Merge branch 'develop' into type-string-driver

1b098fd7 · Paul Fultz II · GitHub · 05f2ee1c · c0398ded · 1b098fd7
Unverified Commit 1b098fd7 authored Jun 21, 2022 by Paul Fultz II Committed by GitHub Jun 21, 2022
20 changed files
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
+project(migraphx-doc)
+find_package(ROCM REQUIRED)
-include(DoxygenDoc)
+include(ROCMDoxygenDoc)
-set(DOXYGEN_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doxygen/)
+set(DOXYGEN_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doxygen)
-add_doxygen_doc(
+rocm_add_doxygen_doc(
    OUTPUT_DIRECTORY ${DOXYGEN_OUTPUT}
    INPUT 
-        ${CMAKE_CURRENT_SOURCE_DIR}/../src
+        ${CMAKE_SOURCE_DIR}/src
    INCLUDE_PATH
-        ${CMAKE_CURRENT_SOURCE_DIR}/../src/include
+        ${CMAKE_SOURCE_DIR}/src/include
-        ${CMAKE_CURRENT_SOURCE_DIR}/../src/targets/cpu/include
+        ${CMAKE_SOURCE_DIR}/src/targets/cpu/include
-        ${CMAKE_CURRENT_SOURCE_DIR}/../src/targets/gpu/include
+        ${CMAKE_SOURCE_DIR}/src/targets/gpu/include
+    STRIP_FROM_INC_PATH
+        ${CMAKE_SOURCE_DIR}/src/include
+        ${CMAKE_SOURCE_DIR}/src/targets/cpu/include
+        ${CMAKE_SOURCE_DIR}/src/targets/gpu/include
+    EXCLUDE_PATTERNS
+        ${CMAKE_SOURCE_DIR}/src/targets/gpu/kernels
+        ${CMAKE_SOURCE_DIR}/src/targets/gpu/device
    SEARCH_INCLUDES YES
    MACRO_EXPANSION YES
    RECURSIVE YES
@@ -29,26 +38,23 @@ add_doxygen_doc(
    EXTRACT_ALL YES
    ENUM_VALUES_PER_LINE 1
    FULL_PATH_NAMES YES
+    WARN_LOGFILE "${DOXYGEN_OUTPUT}/DoxygenWarningLog.txt"
    PREDEFINED DOXYGEN
 )
-add_custom_target(remove_inline_ns 
-    sed -i "s/MIGRAPHX_INLINE_NS:://g" *.xml 
-    WORKING_DIRECTORY ${DOXYGEN_OUTPUT}/xml)
-add_dependencies(remove_inline_ns doxygen)
-include(SphinxDoc)
+include(ROCMSphinxDoc)
-add_sphinx_doc(src 
+rocm_add_sphinx_doc(src 
    BUILDER html 
-    OUTPUT_DIR html 
+    OUTPUT_DIR html
    VARS 
        breathe_projects.proj=${DOXYGEN_OUTPUT}/xml
        breathe_default_project=proj
-    DEPENDS doxygen remove_inline_ns
+    DEPENDS doxygen
 )
 find_package(LATEX)
 if(LATEX_FOUND)
-    add_sphinx_doc(src 
+    rocm_add_sphinx_doc(src 
        BUILDER latex
        OUTPUT_DIR pdf
        VARS 
@@ -57,6 +63,6 @@ if(LATEX_FOUND)
        DEPENDS doxygen
    )
 else()
-    message("Latex builder not found. Latex builder is required only for building the PDF documentation for MIGraph and is not necessary for building the library, or any other components. To build PDF documentation run make in ${CMAKE_CURRENT_SOURCE_DIR}/pdf, once a latex builder is installed.")
+    message("Latex builder not found. Latex builder is required only for building the PDF documentation for MIGraphX and is not necessary for building the library, or any other components. To build PDF documentation run make in ${CMAKE_CURRENT_SOURCE_DIR}/pdf, once a latex builder is installed.")
 endif()
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
-sphinx==1.6.2
+docutils==0.17.1
-breathe==4.9.1
+sphinx==4.2.0
+breathe==4.31.0
+sphinx_rtd_theme==1.0.0
 # git+https://github.com/arximboldi/breathe@fix-node-parent
--- a/doc/src/conf.py
+++ b/doc/src/conf.py
@@ -18,6 +18,8 @@
 #
 # import os
 # import sys
+from datetime import date
+import re
 # sys.path.insert(0, os.path.abspath('.'))
 # -- General configuration ------------------------------------------------
@@ -29,7 +31,11 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['breathe', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode']
+extensions = [
+    'breathe', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx_rtd_theme',
+    'sphinx.ext.autosectionlabel'
+]
+autosectionlabel_prefix_document = True
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -45,7 +51,7 @@ master_doc = 'index'
 # General information about the project.
 project = u'MIGraphX'
-copyright = u'2018, AMD'
+copyright = u'2018-{}, AMD'.format(date.today().year)
 author = u'AMD'
 # The version info for the project you're documenting, acts as replacement for
@@ -53,9 +59,12 @@ author = u'AMD'
 # built documents.
 #
 # The short X.Y version.
-version = u'0.1'
+with open('../../CMakeLists.txt') as file:
+    version = next((re.findall('[0-9.]+', line)[0]
+                    for line in file.readlines()
+                    if 'rocm_setup_version' in line))
 # The full version, including alpha/beta/rc tags.
-release = u'0.1'
+release = version
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -82,7 +91,7 @@ todo_include_todos = False
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the

--- a/doc/src/contributor_guide.rst
+++ b/doc/src/contributor_guide.rst
+Contributor Guide
+===============
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+   dev_intro
+   dev/data
+   dev/operators
+   dev/program
+   dev/targets
+   dev/quantization
+   dev/pass
+   dev/matchers
+   dev/tools
--- a/doc/src/developer_guide.rst
+++ b/doc/src/developer_guide.rst
-Developer Guide
+C++ User Guide
-===============
+==============
 .. toctree::
   :maxdepth: 2
   :caption: Contents:
-   dev/matchers
+   reference/cpp
--- a/doc/src/dev/data.rst
+++ b/doc/src/dev/data.rst
+Data types
+==========
+shape
+-----
+.. doxygenstruct:: migraphx::internal::shape
+literal
+-------
+.. doxygenstruct:: migraphx::internal::literal
+argument
+--------
+.. doxygenstruct:: migraphx::internal::argument
+raw_data
+--------
+.. doxygenstruct:: migraphx::internal::raw_data
+.. doxygenfunction:: migraphx::internal::visit_all
+tensor_view
+-----------
+.. doxygenstruct:: migraphx::internal::tensor_view
--- a/doc/src/dev/operators.rst
+++ b/doc/src/dev/operators.rst
+Operators
+=========
+operation
+---------
+.. doxygenstruct:: migraphx::internal::operation
+.. doxygenfunction:: migraphx::internal::is_context_free
+.. doxygenfunction:: migraphx::internal::has_finalize
+operators
+---------
+.. doxygennamespace:: migraphx::internal::op
--- a/doc/src/dev/pass.rst
+++ b/doc/src/dev/pass.rst
+Passes
+======
+pass
+----
+.. doxygenstruct:: migraphx::internal::pass
+dead_code_elimination
+---------------------
+.. doxygenstruct:: migraphx::internal::dead_code_elimination
+eliminate_common_subexpression
+------------------------------
+.. doxygenstruct:: migraphx::internal::eliminate_common_subexpression
+eliminate_concat
+----------------
+.. doxygenstruct:: migraphx::internal::eliminate_concat
+eliminate_contiguous
+--------------------
+.. doxygenstruct:: migraphx::internal::eliminate_contiguous
+eliminate_identity
+------------------
+.. doxygenstruct:: migraphx::internal::eliminate_identity
+eliminate_pad
+-------------
+.. doxygenstruct:: migraphx::internal::eliminate_pad
+propagate_constant
+------------------
+.. doxygenstruct:: migraphx::internal::propagate_constant
+rewrite_batchnorm
+-----------------
+.. doxygenstruct:: migraphx::internal::rewrite_batchnorm
+rewrite_rnn
+-----------
+.. doxygenstruct:: migraphx::internal::rewrite_rnn
+schedule
+--------
+.. doxygenstruct:: migraphx::internal::schedule
+simplify_algebra
+----------------
+.. doxygenstruct:: migraphx::internal::simplify_algebra
+simplify_reshapes
+-----------------
+.. doxygenstruct:: migraphx::internal::simplify_reshapes
--- a/doc/src/dev/program.rst
+++ b/doc/src/dev/program.rst
+Program
+=======
+instruction
+-----------
+.. doxygenstruct:: migraphx::internal::instruction
+instruction_ref
+---------------
+.. cpp:type:: migraphx::internal::instruction_ref
+    References an instruction in the program.
+program
+-------
+.. doxygenstruct:: migraphx::internal::program
+parse_onnx
+----------
+.. doxygenfunction:: migraphx::internal::parse_onnx
+parse_tf
+--------
+.. doxygenfunction:: migraphx::internal::parse_tf
+onnx_options
+------------
+.. doxygenstruct:: migraphx::internal::onnx_options
+tf_options
+----------
+.. doxygenstruct:: migraphx::internal::tf_options
--- a/doc/src/dev/quantization.rst
+++ b/doc/src/dev/quantization.rst
+Quantization
+============
+quantize_fp16
+-------------
+.. doxygenfunction:: migraphx::internal::quantize_fp16
+quantize_int8
+-------------
+.. doxygenfunction:: migraphx::internal::quantize_int8
--- a/doc/src/dev/roctx1.jpg
+++ b/doc/src/dev/roctx1.jpg
--- a/doc/src/dev/roctx2.jpg
+++ b/doc/src/dev/roctx2.jpg
--- a/doc/src/reference/targets.rst
+++ b/doc/src/reference/targets.rst
@@ -4,15 +4,15 @@ Targets
 target
 ------
-.. doxygenstruct:: migraphx::target
+.. doxygenstruct:: migraphx::internal::target
 gpu::target
 -----------
-.. doxygenstruct:: migraphx::gpu::target
+.. doxygenstruct:: migraphx::internal::gpu::target
 cpu::target
 -----------
-.. doxygenstruct:: migraphx::cpu::target
+.. doxygenstruct:: migraphx::internal::cpu::target
--- a/doc/src/dev/tools.rst
+++ b/doc/src/dev/tools.rst
+Tools
+=====
+roctx.py
+--------
+MIGraphX driver provides `roctx` command which can be used with `rocprof` binary to get marker timing information for each MIGraphX operator.  
+In order to help user to process timing information, rocTX helper script is provided at `tools/roctx.py`.
+The `roctx.py` helper script provides two main functionality: `run` and `parse`. Available knobs and usage are given below:
+::
+    Usage: roctx.py [-h] [--json-path json_path] [--out out]
+    [--study-name study-name] [--repeat repeat] [--parse]
+    [--run run] [--debug]
+.. option::  --run
+Runs `migraphx-driver roctx` command and given `migraphx-driver` knobs, and then parses the results, providing GPU kernel timing information.
+MIGraphX knobs can be given via a string to `--run` knob. Please see the examples below.
+.. option::  --parse
+Given `--json-path`, parses JSON file and provides GPU kernel timing information.
+.. option::  --out
+Output folder
+.. option::  --study-name
+Optional. Allows user to name a study for easier interpretation. Defaults to timestamp.
+.. option::  --repeat
+Number of iterations. Set to **2** by default.
+.. option::  --debug
+Provides additional debug information related to data. Only use for debugging purposes.
+**Examples:**
+**Running inference with rocTX for a given ONNX file:**
+::
+    python roctx.py --run '--onnx --gpu fcn-resnet50-11.onnx' --out output_folder --repeat 5
+After a run, similar to output given below is expected at terminal. The output will provide `SUM`, `MIN`, `MAX` and `COUNT` information for each kernel executed for a given model.
+Average total time is also provided. There are three files provided for reference:
+1. `OUTPUT CSV FILE` provides a summary of the run, providing utilized MIGraphX knobs and related kernel timing information
+2. `KERNEL TIMING DETAILS` provides the hotspot kernel timing information
+3. This will provide all output data related to all iterations executed during a run.
+An example output:
+.. image:: ./roctx1.jpg
+Hotspot kerel timing information:
+.. image:: ./roctx2.jpg
+**Parsing an already existing JSON file:**
+::
+    python roctx.py --parse --json-path ../trace.json
\ No newline at end of file
--- a/doc/src/dev_intro.rst
+++ b/doc/src/dev_intro.rst
+MIGraphX Fundamentals
+======================
+MIGraphX provides an optimized execution engine for deep learning neural networks.
+We will cover some simple operations in the MIGraphX framework here.
+For a quick start guide to using MIGraphX, look in the examples directory: ``https://github.com/ROCmSoftwarePlatform/AMDMIGraphX/tree/develop/examples/migraphx``.
+Location of the Examples
+-------------------------
+The ``ref_dev_examples.cpp`` can be found in the test directory (``/test``).
+The executable file ``test_ref_dev_examples`` based on this file will be created in the ``bin/`` of the build directory after running ``make -j$(nproc) test_ref_dev_examples``.
+The executable will also be created when running ``make -j$(nproc) check``, alongside with all the other tests.
+Directions for building MIGraphX from source can be found in the main README file: ``https://github.com/ROCmSoftwarePlatform/AMDMIGraphX#readme``.
+Adding Two Literals
+--------------------
+A program is a collection of modules, which are collections of instructions to be executed when calling `eval <migraphx::program::eval>`.
+Each instruction has an associated `operation <migraphx::operation>` which represents the computation to be performed by the instruction.
+We start with a snippet of the simple ``add_two_literals()`` function::
+    // create the program and get a pointer to the main module
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    // add two literals to the program
+    auto one = mm->add_literal(1);
+    auto two = mm->add_literal(2);
+    // make the add operation between the two literals and add it to the program
+    mm->add_instruction(migraphx::make_op("add"), one, two);
+    // compile the program on the reference device
+    p.compile(migraphx::ref::target{});
+    // evaulate the program and retreive the result
+    auto result = p.eval({}).back();
+    std::cout << "add_two_literals: 1 + 2 = " << result << "\n";
+We start by creating a simple ``migraphx::program`` object and then getting a pointer to the main module of it.
+The program is a collection of ``modules`` that start executing from the main module, so instructions are added to the modules rather than directly onto the program object.
+We then use the `add_literal <migraphx::program::add_literal>` function to add an instruction that stores the literal number ``1`` while returning an `instruction_ref <migraphx::instruction_ref>`.
+The returned `instruction_ref <migraphx::instruction_ref>` can be used in another instruction as an input.
+We use the same `add_literal <migraphx::program::add_literal>` function to add a ``2`` to the program.
+After creating the literals, we then create the instruction to add the numbers together.
+This is done by using the `add_instruction <migraphx::program::add_instruction>` function with the ``"add"`` `operation <migraphx::program::operation>` created by `make_op <migraphx::program::make_op>` along with the previous `add_literal` `instruction_ref <migraphx::instruction_ref>` for the input arguments of the instruction.
+Finally, we can run this `program <migraphx::program>` by compiling it for the reference target (CPU) and then running it with `eval <migraphx::program::eval>`
+The result is then retreived and printed to the console.
+We can compile the program for the GPU as well, but the file will have to be moved to the ``test/gpu/`` directory and the correct target must be included::
+    #include <migraphx/gpu/target.hpp>
+Using Parameters
+-----------------
+The previous program will always produce the same value of adding ``1`` and ``2``.
+In the next program we want to pass an input to a program and compute a value based on the input.
+We can modify the program to take an input parameter ``x``, as seen in the ``add_parameter()`` function::
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape s{migraphx::shape::int32_type, {1}};
+    // add a "x" parameter with the shape s
+    auto x   = mm->add_parameter("x", s);
+    auto two = mm->add_literal(2);
+    // add the "add" instruction between the "x" parameter and "two" to the module
+    mm->add_instruction(migraphx::make_op("add"), x, two);
+    p.compile(migraphx::ref::target{});
+This adds a parameter of type ``int32``, and compiles it for the CPU.
+To run the program, we need to pass the parameter as a ``parameter_map`` when we call `eval <migraphx::program::eval>`.
+We create the ``parameter_map`` by setting the ``x`` key to an `argument <migraphx::argument>` object with an ``int`` data type::
+    // create a parameter_map object for passing a value to the "x" parameter
+    std::vector<int> data = {4};
+    migraphx::parameter_map params;
+    params["x"] = migraphx::argument(s, data.data());
+    auto result = p.eval(params).back();
+    std::cout << "add_parameters: 4 + 2 = " << result << "\n";
+    EXPECT(result.at<int>() == 6);
+Handling Tensor Data
+---------------------
+In the previous examples we have only been dealing with scalars, but the `shape <migraphx::shape>` class can describe multi-dimensional tensors.
+For example, we can compute a simple convolution::
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    // create shape objects for the input tensor and weights
+    migraphx::shape input_shape{migraphx::shape::float_type, {2, 3, 4, 4}};
+    migraphx::shape weights_shape{migraphx::shape::float_type, {3, 3, 3, 3}};
+    // create the parameters and add the "convolution" operation to the module
+    auto input   = mm->add_parameter("X", input_shape);
+    auto weights = mm->add_parameter("W", weights_shape);
+    mm->add_instruction(migraphx::make_op("convolution", {{"padding", {1, 1}}, {"stride", {2, 2}}}), input, weights);
+Here we create two parameters for both the ``input`` and ``weights``.
+In the previous examples, we created simple literals, however, most programs will take data from allocated buffers (usually on the GPU).
+In this case, we can create `argument <migraphx::argument>` objects directly from the pointers to the buffers::
+    // Compile the program
+    p.compile(migraphx::ref::target{});
+    // Allocated buffers by the user
+    std::vector<float> a = ...;
+    std::vector<float> c = ...;
+    // Solution vector
+    std::vector<float> sol = ...;
+    // Create the arguments in a parameter_map
+    migraphx::parameter_map params;
+    params["X"] = migraphx::argument(input_shape, a.data());
+    params["W"] = migraphx::argument(weights_shape, c.data());
+    // Evaluate and confirm the result
+    auto result = p.eval(params).back();
+    std::vector<float> results_vector(64);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify_range(results_vector, sol));
+An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU.
+By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target.
+When compiling for the CPU, the buffers by default will be allocated on the CPU.
+When compiling for the GPU, the buffers by default will be allocated on the GPU.
+With the option ``offloaf_copy=true`` set while compiling for the GPU, the buffers will be located on the CPU.
+Importing From ONNX
+--------------------
+A `program <migraphx::program>` can be built directly from an onnx file using the MIGraphX ONNX parser.
+This makes it easier to use neural networks directly from other frameworks.
+In this case, there is an ``parse_onnx`` function::
+    program p = migraphx::parse_onnx("model.onnx");
+    p.compile(migraphx::gpu::target{});
--- a/doc/src/driver.rst
+++ b/doc/src/driver.rst
+MIGraphX Driver
+===============
+read
+----
+.. program:: migraphx-driver read
+Loads and prints input graph.
+.. include:: ./driver/read.rst
+compile
+-------
+.. program:: migraphx-driver compile
+Compiles and prints input graph.
+.. include:: ./driver/compile.rst
+run
+---
+.. program:: migraphx-driver run
+Loads and prints input graph.
+.. include:: ./driver/compile.rst
+perf
+----
+.. program:: migraphx-driver perf
+Compiles and runs input graph then prints performance report.
+.. include:: ./driver/compile.rst
+.. option::  --iterations, -n [unsigned int]
+Number of iterations to run for perf report (Default: 100)
+verify
+------
+.. program:: migraphx-driver verify
+Runs reference and CPU or GPU implementations and checks outputs for consistency.
+.. include:: ./driver/compile.rst
+.. option::  --tolerance [double]
+Tolerance for errors (Default: 80)
+.. option::  -i, --per-instruction
+Verify each instruction
+.. option::  -r, --reduce
+Reduce program and verify
+roctx
+----
+.. program:: migraphx-driver roctx
+Provides marker information for each operation, allowing MIGraphX to be used with `rocprof <https://rocmdocs.amd.com/en/latest/ROCm_Tools/ROCm-Tools.html>`_ for performance analysis.
+This allows user to get GPU-level kernel timing information.
+An example command line combined with rocprof for tracing purposes is given below:
+.. code-block:: bash
+    /opt/rocm/bin/rocprof --hip-trace --roctx-trace --flush-rate 1ms --timestamp on -d <OUTPUT_PATH> --obj-tracking on /opt/rocm/bin/migraphx-driver roctx <ONNX_FILE> <MIGRAPHX_OPTIONS>
+After `rocprof` is run, the output directory will contain trace information for HIP, HCC and ROCTX in seperate `.txt` files.
+To understand the interactions between API calls, it is recommended to utilize `roctx.py` helper script as desribed in :ref:`dev/tools:rocTX` section. 
+.. include:: ./driver/compile.rst
\ No newline at end of file
--- a/doc/src/driver/compile.rst
+++ b/doc/src/driver/compile.rst
+.. include:: ./driver/read.rst
+.. option::  --fill0 [std::vector<std::string>]
+Fill parameter with 0s
+.. option::  --fill1 [std::vector<std::string>]
+Fill parameter with 1s
+.. option::  --gpu
+Compile on the gpu
+.. option::  --cpu
+Compile on the cpu
+.. option::  --ref
+Compile on the reference implementation
+.. option::  --enable-offload-copy
+Enable implicit offload copying
+.. option::  --disable-fast-math
+Disable fast math optimization
+.. option::  --fp16
+Quantize for fp16
+.. option::  --int8
+Quantize for int8
--- a/doc/src/driver/read.rst
+++ b/doc/src/driver/read.rst
+.. option::  <input file>
+File to load
+.. option::  --model [resnet50|inceptionv3|alexnet]
+Load model
+.. option::  --onnx
+Load as onnx
+.. option::  --tf
+Load as tensorflow
+.. option::  --migraphx
+Load as MIGraphX
+.. option::  --migraphx-json
+Load as MIGraphX JSON
+.. option::  --batch [unsigned int] (Default: 1)
+Set batch size for model
+.. option::  --nhwc
+Treat tensorflow format as nhwc
+.. option::  --skip-unknown-operators
+Skip unknown operators when parsing and continue to parse.
+.. option::  --nchw
+Treat tensorflow format as nchw
+.. option::  --trim, -t [unsigned int]
+Trim instructions from the end (Default: 0)
+.. option::  --input-dim [std::vector<std::string>]
+Dim of a parameter (format: "@name d1 d2 dn")
+.. option::  --optimize, -O
+Optimize when reading
+.. option::  --graphviz, -g
+Print out a graphviz representation.
+.. option::  --brief
+Make the output brief.
+.. option::  --cpp
+Print out the program as cpp program.
+.. option::  --json
+Print out program as json.
+.. option::  --text
+Print out program in text format.
+.. option::  --binary
+Print out program in binary format.
+.. option::  --output, -o [std::string]
+Output to file.
--- a/doc/src/index.rst
+++ b/doc/src/index.rst
@@ -10,8 +10,10 @@ Welcome to AMD MIGraphX's documentation!
   :maxdepth: 3
   :caption: Contents:
-   user_guide
+   py_user_guide
-   developer_guide
+   cpp_user_guide
+   driver
+   contributor_guide
 Indices and tables

--- a/doc/src/overview.rst
+++ b/doc/src/overview.rst
-Overview
-========
-MIGraphX provides an optimized execution engine for deep learning neural networks.
-Building a program
------------------
-A program consists of a set of instructions to be executed when calling `eval <migraphx::program::eval>`. Each instruction has an associated `operation <migraphx::operation>` which represents the computation to be performed by the instruction.
-We can start by building a simple program to add two numbers together::
-    program p;
-    instruction_ref one = p.add_literal(1);
-    instruction_ref two = p.add_literal(2);
-    p.add_instruction(add{}, one, two);
-The `add_literal <migraphx::program::add_literal>` function will add an instruction to the program to store a literal number. The `instruction_ref <migraphx::instruction_ref>` is a reference to the instruction in the program, which can be used to compose the output of the instruction with another instruction.
-After creating the literals, we then create the instruction to add the numbers together. This is done by using the `add{} <migraphx::add>` operation class along with the `instruction_ref <migraphx::instruction_ref>` for the input arguments of the instruction.
-Finally, we can run this `program <migraphx::program>` by compiling it for the cpu and then running it with `eval <migraphx::program::eval>`::
-    p.compile(cpu::target{});
-    argument result = p.eval({});
-The easiest way to see the result is to print it::
-    std::cout << result;
-Which will print ``3``.
-We can also compile the program for the gpu as well.
-Adding parameters
-----------------
-Of course, this program will always produce the same value which is quite uninteresting. Instead, we want to pass an input to a program and compute a value based on the input. This can be done with a parameter. For example, we can modify the program to take an input ``x``::
-    program p;
-    instruction_ref x = p.add_parameter("x", {shape::int64_type});
-    instruction_ref two = p.add_literal(2);
-    p.add_instruction(add{}, x, two);
-    p.compile(cpu::target{});
-This adds a parameter of type ``int64``, and compiles it for the ``cpu``. To run the program, we need to pass the parameter to it when we call `eval <migraphx::program::eval>`::
-    argument result = p.eval({
-        {"x", literal{1}.get_argument()}
-    });
-    std::cout << result;
-This will print ``3``.
-A parameter is given as an `argument <migraphx::argument>`. In this case, the simplest way of creating an `argument <migraphx::argument>` is from a `literal <migraphx::literal>`.
-Tensor data
-----------
-In this example we are just creating numbers, but the `shape <migraphx::shape>` class can describe multi-dimensional tensors. For example, we can build a simple network with convolution and relu::
-    program p;
-    instruction_ref input = p.add_parameter("x", shape{shape::float_type, {1, 3, 32, 32}});
-    instruction_ref weights = p.add_parameter("w", shape{shape::float_type, {1, 3, 5, 5}});
-    instruction_ref conv = p.add_instruction(convolution{}, input, weights);
-    p.add_instruction(activation{"relu"}, conv);
-Here we create two parameters for both the ``input`` and ``weights``. In the previous examples, we just created simple literals, however, most programs will take data from already allocated buffers(usually on the GPU). In this case, we can create `argument <migraphx::argument>` objects directly from the pointers to the buffers::
-    // Compile the program
-    p.compile(gpu::target{});
-    // Allocated buffers by the user
-    float* input = ...;
-    float* weights = ...;
-    // Create the arguments
-    argument input_arg{shape{shape::float_type, {1, 3, 32, 32}}, input};
-    argument weights_arg{shape{shape::float_type, {1, 3, 32, 32}}, weights};
-    p.eval({{"x", input_arg}, {"w", weights_arg}})
-An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU, but when running the `program <migraphx::program>`, buffers should be allocated for the corresponding target. That is, when compiling for the CPU, the buffers should be allocated on the CPU, and when compiling for the GPU the buffers should be allocated on the GPU.
-Importing from onnx
-------------------
-A `program <migraphx::program>` can be built directly from an onnx file, which makes it easier to use neural networks directly from other frameworks. In this case, there is an ``parse_onnx`` function::
-    program p = parse_onnx("model.onnx");
-    p.compile(gpu::target{});