Commit 14329696 authored by turneram's avatar turneram
Browse files

Merge remote-tracking branch 'origin/develop' into rewrite-fast-gelu

parents a48c41a9 79e15ca9
...@@ -53,6 +53,7 @@ jobs: ...@@ -53,6 +53,7 @@ jobs:
CXX=/opt/rocm/llvm/bin/clang++ CC=/opt/rocm/llvm/bin/clang cmake \ CXX=/opt/rocm/llvm/bin/clang++ CC=/opt/rocm/llvm/bin/clang cmake \
-DMIGRAPHX_ENABLE_GPU=On \ -DMIGRAPHX_ENABLE_GPU=On \
-DMIGRAPHX_ENABLE_CPU=On \ -DMIGRAPHX_ENABLE_CPU=On \
-DMIGRAPHX_ENABLE_FPGA=On \
-DROCM_ENABLE_GH_ANNOTATIONS=On \ -DROCM_ENABLE_GH_ANNOTATIONS=On \
-DCLANG_TIDY_DEPEND_ON_TARGET=Off \ -DCLANG_TIDY_DEPEND_ON_TARGET=Off \
-DCLANG_TIDY_CACHE=/data/tidy-cache \ -DCLANG_TIDY_CACHE=/data/tidy-cache \
......
name: MIGraphX Performance Tests name: MIGraphX Performance Tests
on: on:
push:
branches: [develop]
pull_request: pull_request:
branches: [develop] branches: [develop]
types: [opened, synchronize, closed]
schedule: schedule:
- cron: "0 5 * * 1-6" - cron: "0 5 * * 1-6"
......
...@@ -91,7 +91,6 @@ add_library(migraphx ...@@ -91,7 +91,6 @@ add_library(migraphx
shape.cpp shape.cpp
simplify_algebra.cpp simplify_algebra.cpp
simplify_reshapes.cpp simplify_reshapes.cpp
target_assignments.cpp
tmp_dir.cpp tmp_dir.cpp
value.cpp value.cpp
verify_args.cpp verify_args.cpp
......
...@@ -25,13 +25,10 @@ ...@@ -25,13 +25,10 @@
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/json.hpp>
#include "models.hpp" #include "models.hpp"
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size) migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
{ {
migraphx::program p; migraphx::program p;
...@@ -42,179 +39,153 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size) ...@@ -42,179 +39,153 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 1))); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 1)));
auto x_main_module_2 = mmain->add_literal(migraphx::abs( auto x_main_module_2 = mmain->add_literal(migraphx::abs(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 2))); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 2)));
auto x_input_1 = mmain->add_parameter( auto x_0 = mmain->add_parameter(
"input.1", migraphx::shape{migraphx::shape::float_type, {batch, 3, 224, 224}}); "0", migraphx::shape{migraphx::shape::float_type, {batch, 3, 224, 224}});
auto x_main_module_4 = mmain->add_literal( auto x_main_module_4 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 4096}}, 3)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000}}, 3));
auto x_main_module_5 = mmain->add_literal( auto x_main_module_5 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 4)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000, 4096}}, 4));
auto x_main_module_6 = mmain->add_literal( auto x_main_module_6 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 9216}}, 5)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 5));
auto x_main_module_7 = mmain->add_literal( auto x_main_module_7 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 6)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 4096}}, 6));
auto x_main_module_8 = mmain->add_literal( auto x_main_module_8 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000, 4096}}, 7)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 7));
auto x_main_module_9 = mmain->add_literal( auto x_main_module_9 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000}}, 8)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 9216}}, 8));
auto x_main_module_10 = mmain->add_literal(migraphx::generate_literal( auto x_main_module_10 = mmain->add_literal(
migraphx::shape{migraphx::shape::float_type, {256, 384, 3, 3}}, 9)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 9));
auto x_main_module_11 = mmain->add_literal( auto x_main_module_11 = mmain->add_literal(migraphx::generate_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 10)); migraphx::shape{migraphx::shape::float_type, {256, 256, 3, 3}}, 10));
auto x_main_module_12 = mmain->add_literal(migraphx::generate_literal( auto x_main_module_12 = mmain->add_literal(
migraphx::shape{migraphx::shape::float_type, {384, 192, 3, 3}}, 11)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 11));
auto x_main_module_13 = mmain->add_literal( auto x_main_module_13 = mmain->add_literal(migraphx::generate_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 12)); migraphx::shape{migraphx::shape::float_type, {256, 384, 3, 3}}, 12));
auto x_main_module_14 = mmain->add_literal(migraphx::generate_literal( auto x_main_module_14 = mmain->add_literal(
migraphx::shape{migraphx::shape::float_type, {192, 64, 5, 5}}, 13)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 13));
auto x_main_module_15 = mmain->add_literal( auto x_main_module_15 = mmain->add_literal(migraphx::generate_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {192}}, 14)); migraphx::shape{migraphx::shape::float_type, {384, 192, 3, 3}}, 14));
auto x_main_module_16 = mmain->add_literal(migraphx::generate_literal( auto x_main_module_16 = mmain->add_literal(
migraphx::shape{migraphx::shape::float_type, {256, 256, 3, 3}}, 15)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {192}}, 15));
auto x_main_module_17 = mmain->add_literal( auto x_main_module_17 = mmain->add_literal(migraphx::generate_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 16)); migraphx::shape{migraphx::shape::float_type, {192, 64, 5, 5}}, 16));
auto x_main_module_18 = mmain->add_literal(migraphx::generate_literal( auto x_main_module_18 = mmain->add_literal(
migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 17)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {64}}, 17));
auto x_main_module_19 = mmain->add_literal( auto x_main_module_19 = mmain->add_literal(migraphx::generate_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {64}}, 18)); migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 18));
auto x_main_module_20 = mmain->add_instruction( auto x_main_module_20 = mmain->add_instruction(
migraphx::make_op( migraphx::make_json_op("convolution",
"convolution", "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[4,"
migraphx::from_json_string( "4],use_dynamic_same_auto_pad:0}"),
"{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[4,4]}")), x_0,
x_input_1,
x_main_module_18);
auto x_main_module_21 = mmain->add_instruction(
migraphx::make_op("broadcast",
migraphx::from_json_string("{axis:1,out_lens:[1,64,55,55]}")),
x_main_module_19); x_main_module_19);
auto x_main_module_21 = mmain->add_instruction(
migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,64,55,55]}"), x_main_module_18);
auto x_main_module_22 = auto x_main_module_22 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_20, x_main_module_21); mmain->add_instruction(migraphx::make_op("add"), x_main_module_20, x_main_module_21);
auto x_main_module_23 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_22); auto x_main_module_23 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_22);
auto x_main_module_24 = mmain->add_instruction( auto x_main_module_24 = mmain->add_instruction(
migraphx::make_op( migraphx::make_json_op(
"pooling", "pooling",
migraphx::from_json_string( "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
"{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
x_main_module_23); x_main_module_23);
auto x_main_module_25 = mmain->add_instruction( auto x_main_module_25 = mmain->add_instruction(
migraphx::make_op( migraphx::make_json_op("convolution",
"convolution", "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[1,"
migraphx::from_json_string( "1],use_dynamic_same_auto_pad:0}"),
"{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[1,1]}")),
x_main_module_24, x_main_module_24,
x_main_module_14); x_main_module_17);
auto x_main_module_26 = mmain->add_instruction( auto x_main_module_26 = mmain->add_instruction(
migraphx::make_op("broadcast", migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,192,27,27]}"), x_main_module_16);
migraphx::from_json_string("{axis:1,out_lens:[1,192,27,27]}")),
x_main_module_15);
auto x_main_module_27 = auto x_main_module_27 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_25, x_main_module_26); mmain->add_instruction(migraphx::make_op("add"), x_main_module_25, x_main_module_26);
auto x_main_module_28 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_27); auto x_main_module_28 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_27);
auto x_main_module_29 = mmain->add_instruction( auto x_main_module_29 = mmain->add_instruction(
migraphx::make_op( migraphx::make_json_op(
"pooling", "pooling",
migraphx::from_json_string( "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
"{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
x_main_module_28); x_main_module_28);
auto x_main_module_30 = mmain->add_instruction( auto x_main_module_30 = mmain->add_instruction(
migraphx::make_op( migraphx::make_json_op("convolution",
"convolution", "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,"
migraphx::from_json_string( "1],use_dynamic_same_auto_pad:0}"),
"{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
x_main_module_29, x_main_module_29,
x_main_module_12); x_main_module_15);
auto x_main_module_31 = mmain->add_instruction( auto x_main_module_31 = mmain->add_instruction(
migraphx::make_op("broadcast", migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,384,13,13]}"), x_main_module_14);
migraphx::from_json_string("{axis:1,out_lens:[1,384,13,13]}")),
x_main_module_13);
auto x_main_module_32 = auto x_main_module_32 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_30, x_main_module_31); mmain->add_instruction(migraphx::make_op("add"), x_main_module_30, x_main_module_31);
auto x_main_module_33 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_32); auto x_main_module_33 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_32);
auto x_main_module_34 = mmain->add_instruction( auto x_main_module_34 = mmain->add_instruction(
migraphx::make_op( migraphx::make_json_op("convolution",
"convolution", "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,"
migraphx::from_json_string( "1],use_dynamic_same_auto_pad:0}"),
"{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
x_main_module_33, x_main_module_33,
x_main_module_10); x_main_module_13);
auto x_main_module_35 = mmain->add_instruction( auto x_main_module_35 = mmain->add_instruction(
migraphx::make_op("broadcast", migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,256,13,13]}"), x_main_module_12);
migraphx::from_json_string("{axis:1,out_lens:[1,256,13,13]}")),
x_main_module_11);
auto x_main_module_36 = auto x_main_module_36 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_34, x_main_module_35); mmain->add_instruction(migraphx::make_op("add"), x_main_module_34, x_main_module_35);
auto x_main_module_37 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_36); auto x_main_module_37 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_36);
auto x_main_module_38 = mmain->add_instruction( auto x_main_module_38 = mmain->add_instruction(
migraphx::make_op( migraphx::make_json_op("convolution",
"convolution", "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,"
migraphx::from_json_string( "1],use_dynamic_same_auto_pad:0}"),
"{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
x_main_module_37, x_main_module_37,
x_main_module_16); x_main_module_11);
auto x_main_module_39 = mmain->add_instruction( auto x_main_module_39 = mmain->add_instruction(
migraphx::make_op("broadcast", migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,256,13,13]}"), x_main_module_10);
migraphx::from_json_string("{axis:1,out_lens:[1,256,13,13]}")),
x_main_module_17);
auto x_main_module_40 = auto x_main_module_40 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_38, x_main_module_39); mmain->add_instruction(migraphx::make_op("add"), x_main_module_38, x_main_module_39);
auto x_main_module_41 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_40); auto x_main_module_41 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_40);
auto x_main_module_42 = mmain->add_instruction( auto x_main_module_42 = mmain->add_instruction(
migraphx::make_op( migraphx::make_json_op(
"pooling", "pooling",
migraphx::from_json_string( "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
"{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
x_main_module_41); x_main_module_41);
auto x_main_module_43 = mmain->add_instruction( auto x_main_module_43 =
migraphx::make_op("reshape", migraphx::from_json_string("{dims:[1,9216]}")), mmain->add_instruction(migraphx::make_json_op("flatten", "{axis:1}"), x_main_module_42);
x_main_module_42); auto x_main_module_44 = mmain->add_instruction(migraphx::make_op("identity"), x_main_module_43);
auto x_main_module_44 = mmain->add_instruction( auto x_main_module_45 = mmain->add_instruction(
migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")), migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_9);
x_main_module_6); auto x_main_module_46 =
auto x_main_module_45 = mmain->add_instruction(migraphx::make_op("dot"), x_main_module_44, x_main_module_45);
mmain->add_instruction(migraphx::make_op("dot"), x_main_module_43, x_main_module_44);
auto x_main_module_46 = mmain->add_instruction(
migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
x_main_module_7);
auto x_main_module_47 = mmain->add_instruction( auto x_main_module_47 = mmain->add_instruction(
migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")), migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_8);
x_main_module_2); auto x_main_module_48 = mmain->add_instruction(
auto x_main_module_48 = migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_2);
mmain->add_instruction(migraphx::make_op("mul"), x_main_module_46, x_main_module_47);
auto x_main_module_49 = auto x_main_module_49 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_45, x_main_module_48); mmain->add_instruction(migraphx::make_op("mul"), x_main_module_47, x_main_module_48);
auto x_main_module_50 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_49); auto x_main_module_50 =
auto x_main_module_51 = mmain->add_instruction( mmain->add_instruction(migraphx::make_op("add"), x_main_module_46, x_main_module_49);
migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")), auto x_main_module_51 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_50);
x_main_module_4); auto x_main_module_52 = mmain->add_instruction(migraphx::make_op("identity"), x_main_module_51);
auto x_main_module_52 =
mmain->add_instruction(migraphx::make_op("dot"), x_main_module_50, x_main_module_51);
auto x_main_module_53 = mmain->add_instruction( auto x_main_module_53 = mmain->add_instruction(
migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")), migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_7);
x_main_module_5); auto x_main_module_54 =
auto x_main_module_54 = mmain->add_instruction( mmain->add_instruction(migraphx::make_op("dot"), x_main_module_52, x_main_module_53);
migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")), auto x_main_module_55 = mmain->add_instruction(
x_main_module_1); migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_6);
auto x_main_module_55 = auto x_main_module_56 = mmain->add_instruction(
mmain->add_instruction(migraphx::make_op("mul"), x_main_module_53, x_main_module_54); migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_1);
auto x_main_module_56 = auto x_main_module_57 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_52, x_main_module_55); mmain->add_instruction(migraphx::make_op("mul"), x_main_module_55, x_main_module_56);
auto x_main_module_57 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_56); auto x_main_module_58 =
auto x_main_module_58 = mmain->add_instruction( mmain->add_instruction(migraphx::make_op("add"), x_main_module_54, x_main_module_57);
migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")), auto x_main_module_59 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_58);
x_main_module_8);
auto x_main_module_59 =
mmain->add_instruction(migraphx::make_op("dot"), x_main_module_57, x_main_module_58);
auto x_main_module_60 = mmain->add_instruction( auto x_main_module_60 = mmain->add_instruction(
migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,1000]}")), migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_5);
x_main_module_9); auto x_main_module_61 =
auto x_main_module_61 = mmain->add_instruction( mmain->add_instruction(migraphx::make_op("dot"), x_main_module_59, x_main_module_60);
migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,1000]}")), auto x_main_module_62 = mmain->add_instruction(
x_main_module_0); migraphx::make_json_op("multibroadcast", "{out_lens:[1,1000]}"), x_main_module_4);
auto x_main_module_62 = auto x_main_module_63 = mmain->add_instruction(
mmain->add_instruction(migraphx::make_op("mul"), x_main_module_60, x_main_module_61); migraphx::make_json_op("multibroadcast", "{out_lens:[1,1000]}"), x_main_module_0);
auto x_main_module_63 = auto x_main_module_64 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_59, x_main_module_62); mmain->add_instruction(migraphx::make_op("mul"), x_main_module_62, x_main_module_63);
mmain->add_return({x_main_module_63}); auto x_main_module_65 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_61, x_main_module_64);
mmain->add_return({x_main_module_65});
return p; return p;
} }
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
...@@ -27,6 +27,8 @@ ...@@ -27,6 +27,8 @@
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/operation.hpp> #include <migraphx/operation.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/json.hpp>
#include <migraphx/convert_to_json.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -46,6 +48,8 @@ operation make_op(const std::string& name, const Value& v) ...@@ -46,6 +48,8 @@ operation make_op(const std::string& name, const Value& v)
return make_op_from_value(name, v); return make_op_from_value(name, v);
} }
operation make_json_op(const std::string& name, const std::string& s);
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -21,16 +21,24 @@ ...@@ -21,16 +21,24 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_MIGRAPHX_SUPPORTED_SEGMENTS_HPP
#define MIGRAPHX_GUARD_MIGRAPHX_SUPPORTED_SEGMENTS_HPP
#include <migraphx/target_assignments.hpp> #include <unordered_set>
#include <migraphx/instruction_ref.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
void target_assignments::add_assignment(instruction_ref ins, const std::string& target) struct supported_segment
{ {
assignments.emplace(ins, target); std::unordered_set<instruction_ref> instructions;
} float metric;
};
using supported_segments = std::vector<supported_segment>;
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_MIGRAPHX_SUPPORTED_SEGMENTS_HPP
...@@ -37,8 +37,10 @@ ...@@ -37,8 +37,10 @@
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/rank.hpp> #include <migraphx/rank.hpp>
#include <migraphx/module_ref.hpp>
#include <migraphx/support_metric.hpp> #include <migraphx/support_metric.hpp>
#include <migraphx/instruction_ref.hpp> #include <migraphx/instruction_ref.hpp>
#include <migraphx/supported_segments.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -64,12 +66,12 @@ struct target ...@@ -64,12 +66,12 @@ struct target
*/ */
context get_context() const; context get_context() const;
/** /**
* @brief Check how well an instruction is supported on a target with the given metric * @brief Get the ranges of instructions that are supported on a target
* @param ins Instruction to check if it's supported * @param module Module to check for supported instructions
* @param metric Used to define how the return value should be interpreted * @param metric Used to define how the quality of the support should be measured
* @return The value based on the chosen metric. Negative numbers mean unsupported * @return the supported segments of the graph
*/ */
float is_supported(T&, instruction_ref ins, support_metric m) const; supported_segments target_is_supported(T&, const_module_ref mod, support_metric metric) const;
/** /**
* @brief copy an argument to the current target. * @brief copy an argument to the current target.
* *
...@@ -115,9 +117,9 @@ argument copy_from_target(T&, const argument& arg) ...@@ -115,9 +117,9 @@ argument copy_from_target(T&, const argument& arg)
} }
template <class T> template <class T>
float target_is_supported(T&, instruction_ref, support_metric) supported_segments target_find_supported(T&, const_module_ref, support_metric)
{ {
return 0; return {};
} }
#ifdef TYPE_ERASED_DECLARATION #ifdef TYPE_ERASED_DECLARATION
...@@ -132,7 +134,7 @@ struct target ...@@ -132,7 +134,7 @@ struct target
// //
context get_context() const; context get_context() const;
// (optional) // (optional)
float is_supported(instruction_ref ins, support_metric m) const; supported_segments find_supported(const_module_ref mod, support_metric m) const;
// (optional) // (optional)
argument copy_to(const argument& input) const; argument copy_to(const argument& input) const;
// (optional) // (optional)
...@@ -224,10 +226,10 @@ struct target ...@@ -224,10 +226,10 @@ struct target
return (*this).private_detail_te_get_handle().get_context(); return (*this).private_detail_te_get_handle().get_context();
} }
float is_supported(instruction_ref ins, support_metric m) const supported_segments find_supported(const_module_ref mod, support_metric m) const
{ {
assert((*this).private_detail_te_handle_mem_var); assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().is_supported(ins, m); return (*this).private_detail_te_get_handle().find_supported(mod, m);
} }
argument copy_to(const argument& input) const argument copy_to(const argument& input) const
...@@ -265,29 +267,29 @@ struct target ...@@ -265,29 +267,29 @@ struct target
virtual std::vector<pass> get_passes(context& ctx, virtual std::vector<pass> get_passes(context& ctx,
const compile_options& options) const = 0; const compile_options& options) const = 0;
virtual context get_context() const = 0; virtual context get_context() const = 0;
virtual float is_supported(instruction_ref ins, support_metric m) const = 0; virtual supported_segments find_supported(const_module_ref mod, support_metric m) const = 0;
virtual argument copy_to(const argument& input) const = 0; virtual argument copy_to(const argument& input) const = 0;
virtual argument copy_from(const argument& input) const = 0; virtual argument copy_from(const argument& input) const = 0;
virtual argument allocate(const shape& s) const = 0; virtual argument allocate(const shape& s) const = 0;
}; };
template <class T> template <class T>
static auto private_detail_te_default_is_supported(char, static auto private_detail_te_default_find_supported(char,
T&& private_detail_te_self, T&& private_detail_te_self,
instruction_ref ins, const_module_ref mod,
support_metric m) support_metric m)
-> decltype(private_detail_te_self.is_supported(ins, m)) -> decltype(private_detail_te_self.find_supported(mod, m))
{ {
return private_detail_te_self.is_supported(ins, m); return private_detail_te_self.find_supported(mod, m);
} }
template <class T> template <class T>
static float private_detail_te_default_is_supported(float, static supported_segments private_detail_te_default_find_supported(float,
T&& private_detail_te_self, T&& private_detail_te_self,
instruction_ref ins, const_module_ref mod,
support_metric m) support_metric m)
{ {
return target_is_supported(private_detail_te_self, ins, m); return target_find_supported(private_detail_te_self, mod, m);
} }
template <class T> template <class T>
...@@ -372,10 +374,11 @@ struct target ...@@ -372,10 +374,11 @@ struct target
context get_context() const override { return private_detail_te_value.get_context(); } context get_context() const override { return private_detail_te_value.get_context(); }
float is_supported(instruction_ref ins, support_metric m) const override supported_segments find_supported(const_module_ref mod, support_metric m) const override
{ {
return private_detail_te_default_is_supported(char(0), private_detail_te_value, ins, m); return private_detail_te_default_find_supported(
char(0), private_detail_te_value, mod, m);
} }
argument copy_to(const argument& input) const override argument copy_to(const argument& input) const override
......
...@@ -33,10 +33,20 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -33,10 +33,20 @@ inline namespace MIGRAPHX_INLINE_NS {
struct target_assignments struct target_assignments
{ {
void add_assignment(instruction_ref ins, const std::string& target); using iterator = std::unordered_map<instruction_ref, std::string>::const_iterator;
using value_type = std::pair<instruction_ref, std::string>;
auto begin() const { return assignments.cbegin(); } auto size() const { return assignments.size(); }
auto end() const { return assignments.cend(); } auto& at(instruction_ref ins) const { return assignments.at(ins); }
auto insert(iterator it, const std::pair<instruction_ref, std::string>& assignment)
{
return assignments.insert(it, assignment);
}
auto find(instruction_ref ins) const { return assignments.find(ins); }
auto begin() const { return assignments.begin(); }
auto end() const { return assignments.end(); }
private: private:
std::unordered_map<instruction_ref, std::string> assignments; std::unordered_map<instruction_ref, std::string> assignments;
......
...@@ -64,5 +64,10 @@ operation make_op_from_value(const std::string& name, const value& v) ...@@ -64,5 +64,10 @@ operation make_op_from_value(const std::string& name, const value& v)
}); });
} }
operation make_json_op(const std::string& name, const std::string& s)
{
return make_op(name, from_json_string(convert_to_json(s)));
}
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -788,12 +788,15 @@ static std::string cpp_var_name(const std::string& name) ...@@ -788,12 +788,15 @@ static std::string cpp_var_name(const std::string& name)
static void print_make_op(std::ostream& os, const operation& op) static void print_make_op(std::ostream& os, const operation& op)
{ {
os << "migraphx::make_op(" << enclose_name(op.name());
auto v = op.to_value(); auto v = op.to_value();
if(not v.empty()) if(not v.empty())
{ {
os << ", " os << "migraphx::make_json_op(" << enclose_name(op.name());
<< "migraphx::from_json_string(" << enclose_name(to_json_string(v)) << ")"; os << ", " << enclose_name(to_json_string(v));
}
else
{
os << "migraphx::make_op(" << enclose_name(op.name());
} }
os << ")"; os << ")";
} }
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <migraphx/output_iterator.hpp> #include <migraphx/output_iterator.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/marker.hpp> #include <migraphx/marker.hpp>
#include <migraphx/supported_segments.hpp>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
...@@ -167,13 +168,37 @@ target_assignments program::get_target_assignments(const std::vector<target>& ta ...@@ -167,13 +168,37 @@ target_assignments program::get_target_assignments(const std::vector<target>& ta
target_assignments p; target_assignments p;
const auto* mod = get_main_module(); const auto* mod = get_main_module();
for(auto it : iterator_for(*mod)) std::vector<std::pair<target, supported_segments>> target_subgraphs;
target_subgraphs.reserve(targets.size());
std::transform(targets.begin(),
targets.end(),
std::back_inserter(target_subgraphs),
[&](const auto& t) { return std::make_pair(t, t.find_supported(mod, m)); });
for(const auto ins : iterator_for(*mod))
{ {
auto t = std::max_element( if(contains(p, ins))
targets.begin(), targets.end(), [it, m](const target& lhs, const target& rhs) { {
return lhs.is_supported(it, m) < rhs.is_supported(it, m); continue;
}); }
p.add_assignment(it, t->name());
for(const auto& [target, subgraph] : target_subgraphs)
{
// can't pass a structured binding into lambda in C++17 so create a variable for it
const auto& t = target;
for(const auto& segment : subgraph)
{
const auto& instructions = segment.instructions;
if(not contains(instructions, ins))
{
continue;
}
std::transform(instructions.begin(),
instructions.end(),
std::inserter(p, p.end()),
[&](auto instr) { return std::make_pair(instr, t.name()); });
}
}
} }
return p; return p;
} }
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/op/common.hpp>
#ifdef HAVE_GPU #ifdef HAVE_GPU
#include <migraphx/gpu/hip.hpp> #include <migraphx/gpu/hip.hpp>
...@@ -82,7 +83,7 @@ void visit_py(T x, F f) ...@@ -82,7 +83,7 @@ void visit_py(T x, F f)
{ {
f(x.template cast<bool>()); f(x.template cast<bool>());
} }
else if(py::isinstance<py::int_>(x)) else if(py::isinstance<py::int_>(x) || py::hasattr(x, "__index__"))
{ {
f(x.template cast<int>()); f(x.template cast<int>());
} }
...@@ -324,6 +325,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -324,6 +325,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
.def("get_parameter_names", &migraphx::program::get_parameter_names) .def("get_parameter_names", &migraphx::program::get_parameter_names)
.def("get_parameter_shapes", &migraphx::program::get_parameter_shapes) .def("get_parameter_shapes", &migraphx::program::get_parameter_shapes)
.def("get_output_shapes", &migraphx::program::get_output_shapes) .def("get_output_shapes", &migraphx::program::get_output_shapes)
.def("is_compiled", &migraphx::program::is_compiled)
.def( .def(
"compile", "compile",
[](migraphx::program& p, const migraphx::target& t, bool offload_copy, bool fast_math) { [](migraphx::program& p, const migraphx::target& t, bool offload_copy, bool fast_math) {
...@@ -358,8 +360,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -358,8 +360,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
.def("__ne__", std::not_equal_to<migraphx::program>{}) .def("__ne__", std::not_equal_to<migraphx::program>{})
.def("__repr__", [](const migraphx::program& p) { return migraphx::to_string(p); }); .def("__repr__", [](const migraphx::program& p) { return migraphx::to_string(p); });
py::class_<migraphx::operation>(m, "op") py::class_<migraphx::operation> op(m, "op");
.def(py::init([](const std::string& name, py::kwargs kwargs) { op.def(py::init([](const std::string& name, py::kwargs kwargs) {
migraphx::value v = migraphx::value::object{}; migraphx::value v = migraphx::value::object{};
if(kwargs) if(kwargs)
{ {
...@@ -367,9 +369,26 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -367,9 +369,26 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
} }
return migraphx::make_op(name, v); return migraphx::make_op(name, v);
})) }))
.def("name", &migraphx::operation::name); .def("name", &migraphx::operation::name);
py::enum_<migraphx::op::pooling_mode>(op, "pooling_mode")
.value("average", migraphx::op::pooling_mode::average)
.value("max", migraphx::op::pooling_mode::max)
.value("lpnorm", migraphx::op::pooling_mode::lpnorm);
py::enum_<migraphx::op::rnn_direction>(op, "rnn_direction")
.value("forward", migraphx::op::rnn_direction::forward)
.value("reverse", migraphx::op::rnn_direction::reverse)
.value("bidirectional", migraphx::op::rnn_direction::bidirectional);
m.def(
"argument_from_pointer",
[](const migraphx::shape shape, const int64_t address) {
return migraphx::argument(shape, reinterpret_cast<void*>(address));
},
py::arg("shape"),
py::arg("address"));
m.def( m.def(
"parse_tf", "parse_tf",
[](const std::string& filename, [](const std::string& filename,
......
...@@ -151,8 +151,11 @@ struct find_transpose ...@@ -151,8 +151,11 @@ struct find_transpose
{ {
auto matcher() const auto matcher() const
{ {
return match::name("transpose")(match::none_of( auto output_not_transpose =
match::skip_output(match::name("contiguous"))(match::name("transpose")))); match::none_of(match::skip_output(match::name("contiguous"))(match::name("transpose")));
auto input_has_transpose =
match::args(match::skip(match::name("contiguous"))(match::name("transpose")));
return match::name("transpose")(output_not_transpose, input_has_transpose);
} }
void apply(module& m, const match::matcher_result& mr) const void apply(module& m, const match::matcher_result& mr) const
...@@ -664,9 +667,94 @@ struct find_slice_transpose ...@@ -664,9 +667,94 @@ struct find_slice_transpose
} }
}; };
struct find_transpose_slice
{
auto matcher() const
{
return match::name("transpose")(match::all_of[match::outputs()](match::name("slice")));
}
static std::vector<int64_t> slice_distance(const op::slice& op)
{
assert(op.starts.size() == op.ends.size());
std::vector<int64_t> result(op.starts.size());
std::transform(
op.ends.begin(), op.ends.end(), op.starts.begin(), result.begin(), std::minus<>{});
return result;
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto slices = ins->outputs();
if(slices.empty())
return;
auto slice = any_cast<op::slice>(slices.front()->get_operator());
auto sdistance = slice_distance(slice);
// Check all distances and axes are the same
if(std::any_of(slices.begin(), slices.end(), [&](auto sins) {
auto s = any_cast<op::slice>(sins->get_operator());
return s.axes != slice.axes or slice_distance(s) != sdistance;
}))
return;
// Check distances are divisible by lens of corresponding axes
auto mod_by_distance = [&](const auto& v, auto f) {
return std::inner_product(v.begin(),
v.end(),
sdistance.begin(),
0,
std::plus<>{},
[&](auto x, auto d) -> uint64_t {
if(d == 0)
return 1;
return f(x) % d;
});
};
if(mod_by_distance(slice.axes, [&](auto x) { return ins->get_shape().lens()[x]; }) != 0 or
mod_by_distance(slice.starts, id{}) != 0 or mod_by_distance(slice.ends, id{}) != 0)
return;
// TODO: Handle multiple axes
if(sdistance.size() != 1)
return;
auto axis = slice.axes.front();
// Skip if axis would be packed
if(std::all_of(ins->get_shape().lens().begin(),
ins->get_shape().lens().begin() + axis,
[](auto x) { return x == 1; }))
return;
// Compute axis before transpose to use for unsqueeze
auto perm = ins->get_operator().to_value()["permutation"].to_vector<int64_t>();
auto preaxis = std::find(perm.begin(), perm.end(), axis) - perm.begin();
// Make unsqeeze
auto unsqueeze = m.insert_instruction(
ins, make_op("unsqueeze", {{"axes", {preaxis}}, {"steps", sdistance}}), ins->inputs());
// Make transpose
std::transform(perm.begin(), perm.end(), perm.begin(), [&](auto i) {
if(i > preaxis)
return i + 1;
return i;
});
perm.insert(perm.begin(), preaxis + 1);
auto transpose =
m.insert_instruction(ins, make_op("transpose", {{"permutation", perm}}), unsqueeze);
// Slice and squeeze
for(auto s : slices)
{
auto op = any_cast<op::slice>(s->get_operator());
op.axes = {0};
op.starts = {op.starts.front() / sdistance.front()};
op.ends = {op.ends.front() / sdistance.front()};
auto slice_ins = m.insert_instruction(ins, op, transpose);
auto squeeze =
m.insert_instruction(ins, make_op("squeeze", {{"axes", {0}}}), slice_ins);
m.replace_instruction(s, squeeze);
}
}
};
void simplify_reshapes::apply(module& m) const void simplify_reshapes::apply(module& m) const
{ {
for(int i = 0; i < 2; i++) for(int i = 0; i < 4; i++)
{ {
match::find_matches(m, match::find_matches(m,
find_where_op{}, find_where_op{},
...@@ -679,6 +767,7 @@ void simplify_reshapes::apply(module& m) const ...@@ -679,6 +767,7 @@ void simplify_reshapes::apply(module& m) const
find_nested_convert{}, find_nested_convert{},
find_nested_slice{}, find_nested_slice{},
find_nested_concat{}, find_nested_concat{},
find_transpose_slice{},
find_slice_transpose{}, find_slice_transpose{},
find_transpose_contiguous_reshaper_unary{}); find_transpose_contiguous_reshaper_unary{});
dead_code_elimination{}.apply(m); dead_code_elimination{}.apply(m);
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#include <migraphx/fpga/context.hpp> #include <migraphx/fpga/context.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/supported_segments.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -41,7 +42,7 @@ struct target ...@@ -41,7 +42,7 @@ struct target
std::string name() const; std::string name() const;
std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const; std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
migraphx::context get_context() const { return context{}; } migraphx::context get_context() const { return context{}; }
float is_supported(instruction_ref ins, support_metric m); supported_segments find_supported(const_module_ref mod, support_metric m) const;
argument copy_to(const argument& arg) const { return arg; } argument copy_to(const argument& arg) const { return arg; }
argument copy_from(const argument& arg) const { return arg; } argument copy_from(const argument& arg) const { return arg; }
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/normalize_ops.hpp> #include <migraphx/normalize_ops.hpp>
#include <migraphx/iterator_for.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -62,12 +63,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -62,12 +63,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
argument target::allocate(const shape& s) const { return fill_argument(s, 0); } argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
float is_supported(instruction_ref ins, support_metric m) supported_segments target::find_supported(const_module_ref mod, support_metric m) const
{ {
// for now, not using the ins and metric to return a value
(void)ins;
(void)m; (void)m;
return 1.0;
supported_segment instrs;
for(const auto ins : iterator_for(*mod))
{
instrs.instructions.insert(ins);
}
instrs.metric = 1; // arbitrary value
return {instrs};
} }
MIGRAPHX_REGISTER_TARGET(target); MIGRAPHX_REGISTER_TARGET(target);
......
...@@ -25,6 +25,13 @@ ...@@ -25,6 +25,13 @@
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/permutation.hpp> #include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/module.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -75,25 +82,25 @@ std::string vectorize::str() const ...@@ -75,25 +82,25 @@ std::string vectorize::str() const
preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs) preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
{ {
const std::size_t max_lds_bytes = 4096; const std::size_t max_lds_bytes = 4096;
std::vector<bool> result; std::vector<bool> result(inputs.size());
std::transform(inputs.begin(), std::vector<std::size_t> preloaded;
inputs.end(), auto idxs = range(inputs.size());
std::back_inserter(result), std::copy_if(idxs.begin(), idxs.end(), std::back_inserter(preloaded), [&](auto i) {
[&](const shape& input) { return input.strides()[axis] == 0; }); return inputs[i].strides()[axis] == 0;
auto bytes = std::inner_product(inputs.begin(),
inputs.end(),
result.begin(),
std::size_t{0},
std::plus<>{},
[](const shape& s, bool b) -> std::size_t {
if(b)
return s.bytes();
return 0;
}); });
if(bytes < max_lds_bytes) std::sort(preloaded.begin(), preloaded.end(), by(std::less<>{}, [&](auto i) {
return {result}; return inputs[i].bytes();
// TODO: Try to partially preload items }));
std::fill(result.begin(), result.end(), false);
std::size_t bytes = 0;
for(auto i : preloaded)
{
auto input = inputs[i];
bytes += input.bytes();
if(bytes > max_lds_bytes)
break;
result[i] = true;
}
return {result}; return {result};
} }
...@@ -125,6 +132,45 @@ std::string make_transformer_args(std::vector<std::string> transformers) ...@@ -125,6 +132,45 @@ std::string make_transformer_args(std::vector<std::string> transformers)
return join_strings(std::move(transformers), ", "); return join_strings(std::move(transformers), ", ");
} }
std::string generate_pointwise(const module& pm, const std::string& name)
{
module m = pm;
run_passes(m, {eliminate_common_subexpression{}, dead_code_elimination{}});
cpp_generator g;
g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
g.add_point_op("sign", "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
g.add_point_op("less", "migraphx::abs(${0} < ${1})");
g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
g.add_point_op("not", "migraphx::abs(not ${0})");
// Add explict conversions
g.fresult(
[](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; });
g.create_function(
g.generate_module(m).set_attributes({"__device__"}).set_generic_types(m).set_name(name));
return g.str();
}
static std::vector<std::string> get_op_names(const module& m)
{
std::vector<std::string> result;
for(auto& ins : m)
{
if(starts_with(ins.name(), "@"))
continue;
result.push_back(ins.name());
}
return result;
}
std::string generate_name_from_ops(const module& m)
{
auto op_names = get_op_names(m);
return join_strings(op_names, "_");
}
} // namespace gen } // namespace gen
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -827,13 +827,14 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r) ...@@ -827,13 +827,14 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r)
m.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins); m.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins);
} }
inline auto precompile_name(std::string s) // NOLINT template <class... Strings>
inline auto precompile_name(Strings... names) // NOLINT
{ {
return match::make_basic_pred_matcher([=](instruction_ref ins) { return match::make_basic_pred_matcher([=](instruction_ref ins) {
if(ins->name() != "gpu::precompile_op") if(ins->name() != "gpu::precompile_op")
return false; return false;
auto op = from_value<operation>(ins->get_operator().to_value().at("op")); auto op = from_value<operation>(ins->get_operator().to_value().at("op"));
return (op.name() == s); return (contains({names...}, op.name()));
}); });
} }
...@@ -1041,6 +1042,31 @@ struct find_contiguous_pointwise ...@@ -1041,6 +1042,31 @@ struct find_contiguous_pointwise
} }
}; };
struct find_layernorm_pointwise
{
auto matcher() const
{
return precompile_name("pointwise")(match::arg(0)(
precompile_name("gpu::prelayernorm", "gpu::preadd_layernorm").bind("layernorm")));
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto layernorm = r.instructions["layernorm"];
auto* pm = ins->module_inputs().front();
if(not layernorm->module_inputs().empty())
return;
auto inputs = layernorm->inputs();
inputs.pop_back();
inputs.insert(inputs.end(), ins->inputs().begin() + 1, ins->inputs().end());
m.replace_instruction(ins, layernorm->get_operator(), inputs, {pm});
}
};
void fuse_ops::apply(module& m) const void fuse_ops::apply(module& m) const
{ {
match::find_matches(m, find_contiguous_pointwise{}, find_gelu{}, find_gelu_new{fast_math}); match::find_matches(m, find_contiguous_pointwise{}, find_gelu{}, find_gelu_new{fast_math});
...@@ -1063,6 +1089,7 @@ void fuse_ops::apply(module& m) const ...@@ -1063,6 +1089,7 @@ void fuse_ops::apply(module& m) const
match::find_matches(m, match::find_matches(m,
find_triadd_layernorm{}, find_triadd_layernorm{},
find_gemm_add{}, find_gemm_add{},
find_layernorm_pointwise{},
find_gemm_pointwise{}, find_gemm_pointwise{},
find_commutative_broadcast{}); find_commutative_broadcast{});
match::find_matches(m, find_contiguous{}); match::find_matches(m, find_contiguous{});
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#define MIGRAPHX_GUARD_GPU_COMPILE_GEN_HPP #define MIGRAPHX_GUARD_GPU_COMPILE_GEN_HPP
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/module_ref.hpp>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
...@@ -62,6 +63,10 @@ std::string make_transformer_args(Ts... xs) ...@@ -62,6 +63,10 @@ std::string make_transformer_args(Ts... xs)
return make_transformer_args({xs.str()...}); return make_transformer_args({xs.str()...});
} }
std::string generate_pointwise(const module& pm, const std::string& name);
std::string generate_name_from_ops(const module& m);
} // namespace gen } // namespace gen
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment