Merge remote-tracking branch 'origin/develop' into ck-host-lib

baac1dab · Alan Turner · 830dff7a · 77042e30 · baac1dab · baac1dab
Commit baac1dab authored May 24, 2023 by Alan Turner
20 changed files
--- a/doc/src/driver.rst
+++ b/doc/src/driver.rst
--- a/doc/src/driver/compile.rst
+++ b/doc/src/driver/compile.rst
@@ -28,6 +28,10 @@ Enable implicit offload copying
 Disable fast math optimization
+.. option:: --exhaustive-tune
+Perform an exhaustive search to find the fastest version of generated kernels for selected backend
 .. option::  --fp16
 Quantize for fp16

--- a/doc/src/driver/read.rst
+++ b/doc/src/driver/read.rst
@@ -24,7 +24,7 @@ Load as MIGraphX JSON
 .. option::  --batch [unsigned int] (Default: 1)
-Set batch size for model
+For a static model, set batch size. For a dynamic batch model, sets the batch size at runtime.
 .. option::  --nhwc
@@ -46,6 +46,14 @@ Trim instructions from the end (Default: 0)
 Dim of a parameter (format: "@name d1 d2 dn")
+.. options:: --dyn-input-dim [std::vector<std::string>]
+Set dynamic dimensions of a parameter using JSON formatting (format "@name" "dynamic_dimension_json")
+.. options:: --default-dyn-dim
+Set the default dynamic dimension (format {min:x, max:y, optimals:[o1,o2,...]})
 .. option::  --optimize, -O
 Optimize when reading

--- a/doc/src/index.rst
+++ b/doc/src/index.rst
@@ -3,18 +3,10 @@
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.
-Welcome to AMD MIGraphX's documentation!
+AMD MIGraphX documentation
-========================================
+==========================
-.. toctree::
-   :maxdepth: 3
-   :caption: Contents:
-   py_user_guide
-   cpp_user_guide
-   driver
-   contributor_guide
+AMD MIGraphX is AMD's graph inference engine that accelerates machine learning model inference. 
 Indices and tables
 ==================

--- a/doc/src/py_user_guide.rst
+++ b/doc/src/py_user_guide.rst
--- a/doc/src/reference/cpp.rst
+++ b/doc/src/reference/cpp.rst
--- a/doc/src/reference/py.rst
+++ b/doc/src/reference/py.rst
@@ -6,9 +6,9 @@ Python Reference
 shape
 -----
-.. py:class:: shape(type, lens, strides=None)
+.. py:class:: shape(type, lens, strides=None, dyn_dims)
-    Describes the shape of a tensor. This includes size, layout, and data type/
+    Describes the shape of a tensor. This includes size, layout, and data type. Can be a dynamic shape by using dyn_dims.
 .. py:method:: type()
@@ -34,6 +34,12 @@ shape
    :rtype: int
+.. py:method:: dyn_dims()
+    The dynamic dimensions of the shape.
+    :rtype: list[dynamic_dimension]
 .. py:method:: bytes()
    The number of bytes the shape uses.
@@ -46,6 +52,12 @@ shape
    :rtype: int
+.. py:method:: ndim()
+    The number of dimensions for the shape.
+    :rtype: int
 .. py:method:: packed()
    Returns true if the shape is packed.
@@ -64,6 +76,12 @@ shape
    :rtype: bool
+.. py:method:: dynamic()
+    Returns true if the shape is dynamic.
+    :rtype: bool
 .. py:method:: standard()
    Returns true if the shape is a standard shape. That is, the shape is both packed and not transposed.
@@ -76,6 +94,18 @@ shape
    :rtype: bool
+dynamic_dimension
+--------
+.. py:class:: dynamic_dimension(min, max, optimals)
+    Construct a dynamic_dimension from a minimum, a maximum, and optionally a set of optimals.
+.. py:method:: is_fixed()
+    Returns true if the dynamic_dimension is fixed.
+    :rtype : int
 argument
 --------
@@ -215,13 +245,14 @@ program
    :rtype: list[shape]
-.. py:method:: compile(t, offload_copy=True, fast_math=True)
+.. py:method:: compile(t, offload_copy=True, fast_math=True, exhaustive_tune=False)
    Compiles the program for the target and optimizes it.
    :param target t: This is the target to compile the program for.
    :param bool offload_copy: For targets with offloaded memory(such as the gpu), this will insert instructions during compilation to copy the input parameters to the offloaded memory and to copy the final result from the offloaded memory back to main memory.
    :param bool fast_math: Optimize math functions to use faster approximate versions. There may be slight accuracy degredation when enabled.
+    :param exhaustive_tune: Flag to enable exhaustive search to find the fastest version of generated kernels for selected backend.
 .. py:method:: get_main_module()
@@ -291,8 +322,10 @@ parse_onnx
    Load and parse an onnx file.
    :param str filename: Path to file.
-    :param str default_dim_value: default batch size to use (if not specified in onnx file).
+    :param str default_dim_value: default dimension to use (if not specified in onnx file).
+    :param dynamic_dimension default_dyn_dim_value: default dynamic_dimension value to use.
    :param str map_input_dims: Explicitly specify the dims of an input.
+    :param list[dynamic_dimension] map_dyn_input_dims: Explicitly specify the dynamic_dimensions of an input.
    :param str skip_unknown_operators: Continue parsing onnx file if an unknown operator is found.
    :param str print_program_on_error: Print program if an error occurs.
    :param int max_loop_iterations: Maximum iteration number for the loop operator.

--- a/examples/migraphx/cpp_dynamic_batch/CMakeLists.txt
+++ b/examples/migraphx/cpp_dynamic_batch/CMakeLists.txt
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+cmake_minimum_required(VERSION 3.5)
+project (cpp_dynamic_batch)
+set (CMAKE_CXX_STANDARD 14)
+set (EXAMPLE dynamic_batch)
+list (APPEND CMAKE_PREFIX_PATH /opt/rocm)
+find_package (migraphx)
+message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})
+add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
+target_link_libraries(${EXAMPLE} migraphx::c)
--- a/examples/migraphx/cpp_dynamic_batch/README.md
+++ b/examples/migraphx/cpp_dynamic_batch/README.md
+# Running ONNX model with dynamic batch
+## Description
+This examples demonstrates how to run a graph program with dynamic batch using the MIGraphX C++ API. 
+## Creating dynamic dimension objects
+`dynamic_dimension` objects are used in MIGraphX to specify a range of dimension values from a minimum value to a maximum value and optimal values that the tensor can be at model evaluation time.
+A dynamic shape is defined by a list of `dynamic_dimensions` while a static shape only has fixed dimension values.
+For example, a `dynamic_dimension` with `{min:1, max:10, optimals:{1, 4, 10}}` means that the dimension can be any value from 1 through 10 with the optimal values being 1, 4, and 10.
+Supplied optimal values may allow MIGraphX to optimize the program for those specific shapes.
+A fixed `dynamic_dimension` can be specified by setting the `min` and `max` to the same value (ex. `{min:3, max:3}`).
+A dynamic shape specified solely by fixed `dynamic_dimension` objects will be converted to a static shape during parsing.
+This can be useful for setting a static shape using the `set_dyn_input_parameter_shape()` method discussed later in this document.
+## Parsing
+ONNX graphs [ONNX](https://onnx.ai/get-started.html) can be parsed by MIGraphX to create a runnable program with dynamic batch sizes.
+The dynamic batch range must be specified by a `dynamic_dimension` object.
+One method to set the `dynamic_dimension` object works for ONNX files that only have symbolic variables for the batch dimensions:
+```
+migraphx::program p;
+migraphx::onnx_options options;
+options.set_default_dyn_dim_value(migraphx::dynamic_dimension{1, 4, {2, 4}});
+p = parse_onnx(input_file, options);
+```
+Another option that can run any ONNX model with dynamic batch sizes uses the dynamic input map where the entire shape of the input parameter is supplied:
+```
+migraphx::program p;
+migraphx::onnx_options options;
+migraphx::dynamic_dimensions dyn_dims = {migraphx::dynamic_dimension{1, 4, {2, 4}},
+                                         migraphx::dynamic_dimension{3, 3},
+                                         migraphx::dynamic_dimension{4, 4},
+                                         migraphx::dynamic_dimension{4, 4}};
+options.set_dyn_input_parameter_shape("input", dyn_dims);
+p = parse_onnx(input_file, options);
+```
+## Compiling
+Currently the MIGraphX C/C++ API requires that `offload_copy` be enabled for compiling dynamic batch programs.
+Here is a snippet of compiling a model with `offload_copy` enabled:
+```
+migraphx::compile_options c_options;
+c_options.set_offload_copy();
+p.compile(migraphx::target("gpu"), c_options);
+```
+where `p` is the `migraphx::program`.
+## Saving and Loading
+A dynamic batch MIGraphX program can be saved and loaded to/from a MXR file the same way as a fully static shape program.
+## Executing the dynamic batch model
+The compiled dynamic batch model can be executed the same way as a static model by supplying the input data as `arguments` in a `program_parameters` object.
+## Running the Example
+Your ROCm installation could be installed in a location other than the one specified in the CMakeLists.txt.
+You can set `LD_LIBRARY_PATH` or `CMAKE_PREFIX_PATH` to that location so that this program can still build.
+The provided example is [`dynamic_batch.cpp`](./dynamic_batch.cpp)
+To compile and run the example from this directory:
+```
+$ mkdir build
+$ cd build
+$ cmake ..
+$ make
+```
+There will now be an executable named `dynamic_batch` with the following usage:
+```
+$ ./dynamic_batch
+```
--- a/examples/migraphx/cpp_dynamic_batch/add_scalar_test.onnx
+++ b/examples/migraphx/cpp_dynamic_batch/add_scalar_test.onnx
--- a/examples/migraphx/cpp_dynamic_batch/dynamic_batch.cpp
+++ b/examples/migraphx/cpp_dynamic_batch/dynamic_batch.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <algorithm>
+// MIGraphX C++ API
+#include <migraphx/migraphx.hpp>
+int main(int argc, char** argv)
+{
+    migraphx::onnx_options o_options;
+    migraphx::dynamic_dimensions dyn_dims = {migraphx::dynamic_dimension{1, 4, {2, 4}},
+                                             migraphx::dynamic_dimension{3, 3},
+                                             migraphx::dynamic_dimension{4, 4},
+                                             migraphx::dynamic_dimension{5, 5}};
+    o_options.set_dyn_input_parameter_shape("0", dyn_dims);
+    auto p = migraphx::parse_onnx("../add_scalar_test.onnx", o_options);
+    migraphx::compile_options c_options;
+    c_options.set_offload_copy();
+    p.compile(migraphx::target("gpu"), c_options);
+    // batch size = 2
+    std::vector<uint8_t> a(2 * 3 * 4 * 5, 3);
+    std::vector<uint8_t> b = {2};
+    migraphx::program_parameters pp;
+    migraphx::shape s = migraphx::shape(migraphx_shape_uint8_type, {2, 3, 4, 5});
+    pp.add("0", migraphx::argument(s, a.data()));
+    pp.add("1", migraphx::argument(migraphx::shape(migraphx_shape_uint8_type, {1}, {0}), b.data()));
+    auto outputs = p.eval(pp);
+    auto result  = outputs[0];
+    std::vector<uint8_t> c(2 * 3 * 4 * 5, 5);
+    if(bool{result == migraphx::argument(s, c.data())})
+    {
+        std::cout << "Successfully executed dynamic batch add\n";
+    }
+    else
+    {
+        std::cout << "Failed dynamic batch add\n";
+    }
+    return 0;
+}
--- a/examples/migraphx/cpp_parse_load_save/CMakeLists.txt
+++ b/examples/migraphx/cpp_parse_load_save/CMakeLists.txt
@@ -27,7 +27,7 @@ project (PLS)
 set (CMAKE_CXX_STANDARD 14)
 set (EXAMPLE parse_load_save)
-list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
+list (APPEND CMAKE_PREFIX_PATH /opt/rocm)
 find_package (migraphx)
 message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})

--- a/examples/migraphx/custom_op_miopen_kernel/CMakeLists.txt
+++ b/examples/migraphx/custom_op_miopen_kernel/CMakeLists.txt
@@ -27,7 +27,7 @@ project (custom_miopen_kernel)
 set (CMAKE_CXX_STANDARD 14)
 set (EXAMPLE custom_op_miopen_kernel)
-list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
+list (APPEND CMAKE_PREFIX_PATH /opt/rocm)
 find_package (migraphx REQUIRED)
 find_package (miopen REQUIRED)

--- a/examples/migraphx/custom_op_rocblas_kernel/CMakeLists.txt
+++ b/examples/migraphx/custom_op_rocblas_kernel/CMakeLists.txt
@@ -28,7 +28,7 @@ set (CMAKE_CXX_STANDARD 14)
 set (EXAMPLE custom_op_rocblas_kernel)
-list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
+list (APPEND CMAKE_PREFIX_PATH /opt/rocm)
 find_package (migraphx REQUIRED)
 find_package (rocblas REQUIRED)

--- a/examples/migraphx/migraphx_driver/README.md
+++ b/examples/migraphx/migraphx_driver/README.md
@@ -10,52 +10,56 @@ The MIGraphX driver is installed with MIGraphX and can be found in `/opt/rocm/bi
 See below for a comprehensive list of commands and option arguments, as well as some usage examples.
 ### Commands
-| Command | Description |
+| Command | Description                                                                |
-| --- | ---| 
+| ------- | -------------------------------------------------------------------------- |
-| op | When followed by the option --list or -l, prints all operators of MIGraphX |
+| op      | When followed by the option --list or -l, prints all operators of MIGraphX |
-| params | Prints the input and output parameter shapes |
+| params  | Prints the input and output parameter shapes                               |
-| run | Compiles, allocates parameters, evaluates, and prints input graph |
+| run     | Compiles, allocates parameters, evaluates, and prints input graph          |
-| read | Loads and prints input graph |
+| read    | Loads and prints input graph                                               |
-| compile | Compiles and prints input graph |
+| compile | Compiles and prints input graph                                            |
-| verify | Runs reference and GPU implementations and checks outputs for consistency |
+| verify  | Runs reference and GPU implementations and checks outputs for consistency  |
-| perf | Compiles and runs input graph then prints performance report |
+| perf    | Compiles and runs input graph then prints performance report               |
 ### Options
-| Option | Description |
+| Option                                   | Description                                               |
-| --- | --- | 
+| ---------------------------------------- | --------------------------------------------------------- |
-| --help \| -h | Show help | 
+| --help \| -h                             | Show help                                                 |
-| --model <resnet50\|inceptionv3\|alexnet> | Loads one of the three default models |
+| --model <resnet50\|inceptionv3\|alexnet> | Loads one of the three default models                     |
-| --onnx | Load file as onnx graph |
+| --onnx                                   | Load file as onnx graph                                   |
-| --tf | Load file as a tensorflow graph |
+| --tf                                     | Load file as a tensorflow graph                           |
-| --migraphx | Load file as a migraphx graph |
+| --migraphx                               | Load file as a migraphx graph                             |
-| --migraphx-json | Load file as a migraphx JSON graph |
+| --migraphx-json                          | Load file as a migraphx JSON graph                        |
-| --batch | Set batch size for the model | 
+| --batch                                  | For a static model, set batch size. For a dynamic batch model, sets the batch size at runtime.|
-| --nhwc | Treat tensorflow format as nhwc | 
+| --nhwc                                   | Treat tensorflow format as nhwc                           |
-| --nchw | Treat tensorflow format as nchw |
+| --nchw                                   | Treat tensorflow format as nchw                           |
-| --skip-unknown-operators | Skip unknown operators when parsing and continue to parse |
+| --skip-unknown-operators                 | Skip unknown operators when parsing and continue to parse |
-| --trim \| -t | Trim instructions from the end |
+| --trim \| -t                             | Trim instructions from the end                            |
-| --optimize \| -O | Optimize when reading |
+| --optimize \| -O                         | Optimize when reading                                     |
-| --graphviz \| -g | Print out a graphviz representation |
+| --graphviz \| -g                         | Print out a graphviz representation                       |
-| --brief | Make the output brief |
+| --brief                                  | Make the output brief                                     |
-| --cpp | Print out the program as cpp program |
+| --cpp                                    | Print out the program as cpp program                      |
-| --json | Print out program as json |
+| --json                                   | Print out program as json                                 |
-| --text | Print out program in text format |
+| --text                                   | Print out program in text format                          |
-| --binary | Print out program in binary format |
+| --binary                                 | Print out program in binary format                        |
-| --output \| -o | Output to file |
+| --output \| -o                           | Output to file                                            |
-| --fill0 | Fill parameter with 0s |
+| --fill0                                  | Fill parameter with 0s                                    |
-| --fill1 | Fill parameter with 1s |
+| --fill1                                  | Fill parameter with 1s                                    |
-| --gpu | Compile on the gpu |
+| --input-dim                              | Set static dimensions of a parameter                      |
-| --cpu | Compile on the cpu |
+| --dyn-input-dim                          | Set dynamic dimensions of a parameter                     |
-| --ref | Compile on the reference implementation |
+| --default-dyn-dim                        | Set default dynamic dimension                             |
-| --enable-offload-copy | Enable implicit offload copying |
+| --gpu                                    | Compile on the gpu                                        |
-| --disable-fast-math | Disable fast math optimization |
+| --cpu                                    | Compile on the cpu                                        |
-| --fp16 | Quantize for fp16 |
+| --ref                                    | Compile on the reference implementation                   |
-| --int8 | Quantize for int8 |
+| --enable-offload-copy                    | Enable implicit offload copying                           |
-| --tolerance | Tolerance for errors |
+| --disable-fast-math                      | Disable fast math optimization                            |
-| --per-instruction \| -i | Verify each instruction |
+| --exhaustive-tune                        | Enable exhaustive search to find fastest kernel           |
-| --reduce \| -r | Reduce program and verify |
+| --fp16                                   | Quantize for fp16                                         |
-| --iterations \| -n | Number of iterations to run for perf report |
+| --int8                                   | Quantize for int8                                         |
-| --list \| -l | List all the operators of MIGraphX |
+| --tolerance                              | Tolerance for errors                                      |
+| --per-instruction \| -i                  | Verify each instruction                                   |
+| --reduce \| -r                           | Reduce program and verify                                 |
+| --iterations \| -n                       | Number of iterations to run for perf report               |
+| --list \| -l                             | List all the operators of MIGraphX                        |
 ## Usage Examples
 The examples below supply a simple MNIST ConvNet as the input graph. Models of higher complexity will have considerably larger outputs in most cases.
@@ -87,7 +91,7 @@ batch_norm_inference
 broadcast
 capture
 ceil
-check_context::migraphx::version_1::gpu::context
+check_context::migraphx::gpu::context
 clip
 concat
 contiguous
@@ -303,7 +307,7 @@ $ /opt/rocm/bin/migraphx-driver run --onnx simple_graph.onnx
 ```
 Compiling ... 
 Reading: simple_graph.onnx
-@0 = check_context::migraphx::version_1::gpu::context -> float_type, {}, {}
+@0 = check_context::migraphx::gpu::context -> float_type, {}, {}
 @1 = hip::hip_allocate_memory[shape=float_type, {256}, {1},id=scratch] -> float_type, {256}, {1}
 @2 = hip::hip_copy_literal[id=@literal:1] -> float_type, {784, 128}, {128, 1}
 x:0 = @param:x:0 -> float_type, {1, 28, 28}, {784, 28, 1}
@@ -326,7 +330,7 @@ x:0 = @param:x:0 -> float_type, {1, 28, 28}, {784, 28, 1}
 @18 = @return(@17)
 Allocating params ... 
-@0 = check_context::migraphx::version_1::gpu::context -> float_type, {}, {}
+@0 = check_context::migraphx::gpu::context -> float_type, {}, {}
 @1 = hip::hip_allocate_memory[shape=float_type, {256}, {1},id=scratch] -> float_type, {256}, {1}
 @2 = hip::hip_copy_literal[id=@literal:1] -> float_type, {784, 128}, {128, 1}
 x:0 = @param:x:0 -> float_type, {1, 28, 28}, {784, 28, 1}
@@ -398,7 +402,7 @@ $ /opt/rocm/bin/migraphx-driver compile --gpu --fp16 simple_graph.pb
 ```
 Compiling ... 
 Reading: simple_graph.pb
-@0 = check_context::migraphx::version_1::gpu::context -> float_type, {}, {}
+@0 = check_context::migraphx::gpu::context -> float_type, {}, {}
 @1 = hip::hip_allocate_memory[shape=float_type, {456}, {1},id=scratch] -> float_type, {456}, {1}
 @2 = hip::hip_copy_literal[id=@literal:0] -> half_type, {784, 128}, {128, 1}
 @3 = load[offset=256,end=1824](@1) -> half_type, {1, 28, 28}, {784, 28, 1}
@@ -501,7 +505,7 @@ x = @param:x -> float_type, {1, 28, 28}, {784, 28, 1}
 @18 = ref::softmax[axis=1](@17) -> float_type, {1, 10}, {10, 1}
 @19 = ref::identity(@18) -> float_type, {1, 10}, {10, 1}
-@0 = check_context::migraphx::version_1::gpu::context -> float_type, {}, {}
+@0 = check_context::migraphx::gpu::context -> float_type, {}, {}
 @1 = hip::hip_allocate_memory[shape=float_type, {256}, {1},id=scratch] -> float_type, {256}, {1}
 @2 = hip::hip_copy_literal[id=@literal:3] -> float_type, {784, 128}, {128, 1}
 x = @param:x -> float_type, {1, 28, 28}, {784, 28, 1}
@@ -537,7 +541,7 @@ $ /opt/rocm/bin/migraphx-driver perf simple_graph.pb
 ```
 Compiling ... 
 Reading: simple_graph.pb
-@0 = check_context::migraphx::version_1::gpu::context -> float_type, {}, {}
+@0 = check_context::migraphx::gpu::context -> float_type, {}, {}
 @1 = hip::hip_allocate_memory[shape=float_type, {256}, {1},id=scratch] -> float_type, {256}, {1}
 @2 = hip::hip_copy_literal[id=@literal:3] -> float_type, {784, 128}, {128, 1}
 @3 = load[offset=0,end=512](@1) -> float_type, {1, 128}, {128, 1}
@@ -560,7 +564,7 @@ output = @param:output -> float_type, {1, 10}, {10, 1}
 Allocating params ... 
 Running performance report ... 
-@0 = check_context::migraphx::version_1::gpu::context -> float_type, {}, {}: 0.00057782ms, 1%
+@0 = check_context::migraphx::gpu::context -> float_type, {}, {}: 0.00057782ms, 1%
 @1 = hip::hip_allocate_memory[shape=float_type, {256}, {1},id=scratch] -> float_type, {256}, {1}: 0.000295ms, 1%
 @2 = hip::hip_copy_literal[id=@literal:3] -> float_type, {784, 128}, {128, 1}: 0.00027942ms, 1%
 @3 = load[offset=0,end=512](@1) -> float_type, {1, 128}, {128, 1}: 0.000232ms, 1%
@@ -590,7 +594,7 @@ hip::hip_copy_literal: 0.00186824ms, 1%
 load: 0.0016288ms, 1%
 @param: 0.0013428ms, 1%
 broadcast: 0.00118042ms, 1%
-check_context::migraphx::version_1::gpu::context: 0.00057782ms, 1%
+check_context::migraphx::gpu::context: 0.00057782ms, 1%
 reshape: 0.00033842ms, 1%
 hip::hip_allocate_memory: 0.000295ms, 1%

--- a/examples/nlp/python_bert_squad/requirements_bertsquad.txt
+++ b/examples/nlp/python_bert_squad/requirements_bertsquad.txt
@@ -21,6 +21,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 #####################################################################################
-tensorflow==2.9.3
+tensorflow==2.11.1
 onnxruntime
 tokenizers
\ No newline at end of file
--- a/examples/vision/cpp_mnist/CMakeLists.txt
+++ b/examples/vision/cpp_mnist/CMakeLists.txt
@@ -27,7 +27,7 @@ project (CAI)
 set (CMAKE_CXX_STANDARD 14)
 set (EXAMPLE mnist_inference)
-list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
+list (APPEND CMAKE_PREFIX_PATH /opt/rocm)
 find_package (migraphx)
 message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})

--- a/hip-clang.docker
+++ b/hip-clang.docker
@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
 RUN dpkg --add-architecture i386
 # Add rocm repository
-RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.4.2/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
+RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.5/ focal main > /etc/apt/sources.list.d/rocm.list'
 # Install dependencies
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
@@ -47,7 +47,7 @@ ENV LANG=C.UTF-8
 RUN pip3 install yapf==0.28.0
 # Install doc requirements
-ADD doc/requirements.txt /doc-requirements.txt
+ADD docs/.sphinx/requirements.txt /doc-requirements.txt
 RUN pip3 install -r /doc-requirements.txt
 # Install dependencies
@@ -57,4 +57,3 @@ ADD rbuild.ini /rbuild.ini
 COPY ./tools/install_prereqs.sh /
 RUN /install_prereqs.sh /usr/local / && rm /install_prereqs.sh
--- a/rbuild.ini
+++ b/rbuild.ini
@@ -14,6 +14,7 @@ define =
    CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
    CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
    MIGRAPHX_ENABLE_CPU=On
+    BUILD_DEV=On
 [develop]
 cxx = ${rocm_path}/llvm/bin/clang++
@@ -25,3 +26,4 @@ define =
    CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
    CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
    MIGRAPHX_ENABLE_CPU=On
+    BUILD_DEV=On
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,7 +24,7 @@
 google/protobuf@v3.11.0 -DCMAKE_POSITION_INDEPENDENT_CODE=On -X subdir -Dprotobuf_BUILD_TESTS=Off
 nlohmann/json@v3.8.0
 live-clones/blaze@v3.8 -X header -DHEADER_DIR=blaze -H sha256:d0ff011f47538285178908ea5f2cab46bb6a8f55b1edb6e03224a82dbc1a3212
-half,https://github.com/ROCmSoftwarePlatform/half/archive/1.12.0.tar.gz -X header -H sha256:0a08660b68abb176ebc2a0cdf8de46e3182a7f46c66443bb80dbfaaec98cf969
+ROCmSoftwarePlatform/half@rocm-5.4.2
 pybind/pybind11@d159a563383d10c821ba7b2a71905d1207db6de4 --build
 msgpack/msgpack-c@cpp-3.3.0 -DMSGPACK_BUILD_TESTS=Off
 sqlite3@3.17 -DCMAKE_POSITION_INDEPENDENT_CODE=On