Unverified Commit 4a4f537e authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Merge branch 'develop' into gpu-batch-gemm-bert

parents adc03be6 a27dd28c
...@@ -15,7 +15,8 @@ jobs: ...@@ -15,7 +15,8 @@ jobs:
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
- uses: actions/checkout@v2 - uses: actions/checkout@v2
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
...@@ -63,7 +64,7 @@ jobs: ...@@ -63,7 +64,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
- uses: actions/checkout@v2 - uses: actions/checkout@v2
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
...@@ -108,7 +109,7 @@ jobs: ...@@ -108,7 +109,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
- uses: actions/checkout@v2 - uses: actions/checkout@v2
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
...@@ -143,7 +144,7 @@ jobs: ...@@ -143,7 +144,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v2
...@@ -182,7 +183,7 @@ jobs: ...@@ -182,7 +183,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v2
......
...@@ -93,11 +93,14 @@ rocm_enable_clang_tidy( ...@@ -93,11 +93,14 @@ rocm_enable_clang_tidy(
modernize-* modernize-*
performance-* performance-*
readability-* readability-*
-bugprone-signed-char-misuse -bugprone-easily-swappable-parameters
-bugprone-implicit-widening-of-multiplication-result
-bugprone-macro-parentheses -bugprone-macro-parentheses
-bugprone-signed-char-misuse
# Disable the aliased reserved identifiers # Disable the aliased reserved identifiers
-cert-dcl37-c -cert-dcl37-c
-cert-dcl51-cpp -cert-dcl51-cpp
-cert-err33-c
-cert-str34-c -cert-str34-c
# Disable all alpha checks by default # Disable all alpha checks by default
-clang-analyzer-alpha* -clang-analyzer-alpha*
...@@ -127,6 +130,7 @@ rocm_enable_clang_tidy( ...@@ -127,6 +130,7 @@ rocm_enable_clang_tidy(
-cppcoreguidelines-pro-type-union-access -cppcoreguidelines-pro-type-union-access
-cppcoreguidelines-pro-type-vararg -cppcoreguidelines-pro-type-vararg
-cppcoreguidelines-special-member-functions -cppcoreguidelines-special-member-functions
-cppcoreguidelines-virtual-class-destructor
-google-readability-* -google-readability-*
-google-runtime-int -google-runtime-int
-google-runtime-references -google-runtime-references
...@@ -144,8 +148,10 @@ rocm_enable_clang_tidy( ...@@ -144,8 +148,10 @@ rocm_enable_clang_tidy(
-readability-convert-member-functions-to-static -readability-convert-member-functions-to-static
-readability-else-after-return -readability-else-after-return
-readability-function-cognitive-complexity -readability-function-cognitive-complexity
-readability-identifier-length
-readability-named-parameter -readability-named-parameter
-readability-redundant-string-init -readability-redundant-string-init
-readability-suspicious-call-argument
-readability-uppercase-literal-suffix -readability-uppercase-literal-suffix
-*-avoid-c-arrays -*-avoid-c-arrays
-*-explicit-constructor -*-explicit-constructor
...@@ -178,7 +184,7 @@ rocm_enable_cppcheck( ...@@ -178,7 +184,7 @@ rocm_enable_cppcheck(
style style
performance performance
portability portability
SUPPRESS SUPPRESS
ConfigurationNotChecked ConfigurationNotChecked
unmatchedSuppression unmatchedSuppression
unusedFunction unusedFunction
...@@ -216,6 +222,7 @@ rocm_enable_cppcheck( ...@@ -216,6 +222,7 @@ rocm_enable_cppcheck(
CPPCHECK=1 CPPCHECK=1
__device__= __device__=
__host__= __host__=
__global__=
) )
enable_testing() enable_testing()
......
FROM ubuntu:18.04 FROM ubuntu:20.04
ARG PREFIX=/usr/local ARG PREFIX=/usr/local
...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local ...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386 RUN dpkg --add-architecture i386
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/4.5/ ubuntu main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.0.2/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
...@@ -16,16 +16,12 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow- ...@@ -16,16 +16,12 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
cmake \ cmake \
curl \ curl \
doxygen \ doxygen \
g++-5 \
g++-7 \ g++-7 \
gdb \ gdb \
git \ git \
lcov \ lcov \
locales \ locales \
pkg-config \ pkg-config \
python \
python-dev \
python-pip \
python3 \ python3 \
python3-dev \ python3-dev \
python3-pip \ python3-pip \
......
...@@ -94,5 +94,6 @@ function(add_embed_library EMBED_NAME) ...@@ -94,5 +94,6 @@ function(add_embed_library EMBED_NAME)
generate_embed_source(${EMBED_NAME} SRC ${SRC_FILE} HEADER ${HEADER_FILE} OBJECTS ${OUTPUT_FILES} SYMBOLS ${SYMBOLS}) generate_embed_source(${EMBED_NAME} SRC ${SRC_FILE} HEADER ${HEADER_FILE} OBJECTS ${OUTPUT_FILES} SYMBOLS ${SYMBOLS})
add_library(${EMBED_NAME} STATIC ${OUTPUT_FILES} "${SRC_FILE}") add_library(${EMBED_NAME} STATIC ${OUTPUT_FILES} "${SRC_FILE}")
target_include_directories(${EMBED_NAME} PUBLIC "${EMBED_DIR}/include") target_include_directories(${EMBED_NAME} PUBLIC "${EMBED_DIR}/include")
target_compile_options(${EMBED_NAME} PRIVATE -Wno-reserved-identifier)
set_target_properties(${EMBED_NAME} PROPERTIES POSITION_INDEPENDENT_CODE On) set_target_properties(${EMBED_NAME} PROPERTIES POSITION_INDEPENDENT_CODE On)
endfunction() endfunction()
...@@ -96,6 +96,7 @@ else() ...@@ -96,6 +96,7 @@ else()
-Wno-gnu-zero-variadic-macro-arguments -Wno-gnu-zero-variadic-macro-arguments
-Wno-missing-prototypes -Wno-missing-prototypes
-Wno-nested-anon-types -Wno-nested-anon-types
-Wno-option-ignored
-Wno-padded -Wno-padded
-Wno-shorten-64-to-32 -Wno-shorten-64-to-32
-Wno-sign-conversion -Wno-sign-conversion
......
...@@ -146,6 +146,13 @@ module ...@@ -146,6 +146,13 @@ module
:param list[module] mod_args: optional list of module arguments to the operator. :param list[module] mod_args: optional list of module arguments to the operator.
:rtype instruction :rtype instruction
.. py:method:: add_literal(data)
Adds constant or literal data of provided shape into the module from python buffer which includes numpy array.
:param py::buffer data: Python buffer or numpy array
:rtype instruction
.. py:method:: add_parameter(name, shape) .. py:method:: add_parameter(name, shape)
Adds a parameter to the module with provided name and shape. Adds a parameter to the module with provided name and shape.
......
...@@ -62,7 +62,7 @@ ...@@ -62,7 +62,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"!wget -nc https://github.com/onnx/models/raw/master/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx" "!wget -nc https://github.com/onnx/models/blob/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx"
] ]
}, },
{ {
......
...@@ -23,7 +23,7 @@ unzip uncased_L-12_H-768_A-12.zip ...@@ -23,7 +23,7 @@ unzip uncased_L-12_H-768_A-12.zip
``` ```
5) Get BERT ONNX model (bertsquad-10.onnx): 5) Get BERT ONNX model (bertsquad-10.onnx):
``` ```
wget https://github.com/onnx/models/raw/master/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx wget https://github.com/onnx/models/blob/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx
``` ```
6) Run the inference, it will compile and run the model on three questions and small data provided in `inputs.json`: 6) Run the inference, it will compile and run the model on three questions and small data provided in `inputs.json`:
``` ```
......
FROM ubuntu:18.04 FROM ubuntu:20.04
ARG PREFIX=/usr/local ARG PREFIX=/usr/local
...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local ...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386 RUN dpkg --add-architecture i386
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/4.5/ ubuntu main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.0.2/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
...@@ -20,9 +20,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow- ...@@ -20,9 +20,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
git \ git \
lcov \ lcov \
pkg-config \ pkg-config \
python \
python-dev \
python-pip \
python3 \ python3 \
python3-dev \ python3-dev \
python3-pip \ python3-pip \
......
...@@ -8,9 +8,9 @@ ...@@ -8,9 +8,9 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
void adjust_allocation::apply(module& p) const void adjust_allocation::apply(module& m) const
{ {
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(m))
{ {
// skip instruction with no input // skip instruction with no input
if(ins->inputs().empty()) if(ins->inputs().empty())
...@@ -27,13 +27,13 @@ void adjust_allocation::apply(module& p) const ...@@ -27,13 +27,13 @@ void adjust_allocation::apply(module& p) const
// of the instruction, reallocate and replace the previous one // of the instruction, reallocate and replace the previous one
if(alias_ins->get_shape() == ins->get_shape()) if(alias_ins->get_shape() == ins->get_shape())
continue; continue;
auto alloc_ins = p.insert_instruction(ins, model.allocate(ins->get_shape())); auto alloc_ins = m.insert_instruction(ins, model.allocate(ins->get_shape()));
p.replace_instruction(alias_ins, alloc_ins); m.replace_instruction(alias_ins, alloc_ins);
// If the memory is an output parameter then copy the memory to the parameter // If the memory is an output parameter then copy the memory to the parameter
if(alias_ins->name() == "@param") if(alias_ins->name() == "@param")
{ {
auto copy = p.insert_instruction(std::next(ins), make_op(model.copy()), ins, alias_ins); auto copy = m.insert_instruction(std::next(ins), make_op(model.copy()), ins, alias_ins);
auto tail = range(std::next(copy), p.end()); auto tail = range(std::next(copy), m.end());
for(auto i : iterator_for(tail)) for(auto i : iterator_for(tail))
{ {
if(contains(i->inputs(), ins)) if(contains(i->inputs(), ins))
......
...@@ -14,31 +14,31 @@ bool happens_before(const std::vector<std::size_t>& e1, const std::vector<std::s ...@@ -14,31 +14,31 @@ bool happens_before(const std::vector<std::size_t>& e1, const std::vector<std::s
not std::equal(e1.begin(), e1.end(), e2.begin(), e2.end(), std::greater_equal<>{}); not std::equal(e1.begin(), e1.end(), e2.begin(), e2.end(), std::greater_equal<>{});
} }
std::vector<stream_race> analyze_streams(const module& p, const stream_model& m) std::vector<stream_race> analyze_streams(const module& m, const stream_model& strmm)
{ {
using vector_clock = std::vector<std::size_t>; using vector_clock = std::vector<std::size_t>;
std::vector<stream_race> races; std::vector<stream_race> races;
auto nstream = m.get_nstream(); auto nstream = strmm.get_nstream();
std::vector<vector_clock> vclock(nstream, vector_clock(nstream)); std::vector<vector_clock> vclock(nstream, vector_clock(nstream));
std::unordered_map<instruction_ref, vector_clock> timestamp; std::unordered_map<instruction_ref, vector_clock> timestamp;
std::unordered_map<std::size_t, vector_clock> events; std::unordered_map<std::size_t, vector_clock> events;
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(m))
{ {
if(not m.has_stream(ins)) if(not strmm.has_stream(ins))
continue; continue;
std::size_t s = m.get_stream(ins); std::size_t s = strmm.get_stream(ins);
assert(s < nstream); assert(s < nstream);
assert(vclock.size() == nstream); assert(vclock.size() == nstream);
assert(vclock[s].size() == nstream); assert(vclock[s].size() == nstream);
if(m.is_record(ins)) if(strmm.is_record(ins))
{ {
vclock[s][s]++; vclock[s][s]++;
auto event = m.get_event_id(ins); auto event = strmm.get_event_id(ins);
events[event] = vclock[s]; events[event] = vclock[s];
} }
else if(m.is_wait(ins)) else if(strmm.is_wait(ins))
{ {
auto event = m.get_event_id(ins); auto event = strmm.get_event_id(ins);
if(not contains(events, event)) if(not contains(events, event))
MIGRAPHX_THROW("Event is waited on before being recorded: " + MIGRAPHX_THROW("Event is waited on before being recorded: " +
std::to_string(event)); std::to_string(event));
...@@ -57,21 +57,21 @@ std::vector<stream_race> analyze_streams(const module& p, const stream_model& m) ...@@ -57,21 +57,21 @@ std::vector<stream_race> analyze_streams(const module& p, const stream_model& m)
} }
timestamp[ins] = vclock[s]; timestamp[ins] = vclock[s];
} }
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(m))
{ {
if(not m.has_stream(ins)) if(not strmm.has_stream(ins))
continue; continue;
if(ins->inputs().empty()) if(ins->inputs().empty())
continue; continue;
std::size_t s = m.get_stream(ins); std::size_t s = strmm.get_stream(ins);
// Find inputs from different streams // Find inputs from different streams
std::vector<instruction_ref> inputs; std::vector<instruction_ref> inputs;
fix([&](auto self, auto start) { fix([&](auto self, auto start) {
for(auto input : start->inputs()) for(auto input : start->inputs())
{ {
if(not m.has_stream(input)) if(not strmm.has_stream(input))
self(input); self(input);
else if(m.get_stream(input) != s) else if(strmm.get_stream(input) != s)
inputs.push_back(input); inputs.push_back(input);
} }
})(ins); })(ins);
......
...@@ -1072,6 +1072,22 @@ migraphx_module_add_instruction_with_mod_args(migraphx_instruction_t* out, ...@@ -1072,6 +1072,22 @@ migraphx_module_add_instruction_with_mod_args(migraphx_instruction_t* out,
return api_error_result; return api_error_result;
} }
extern "C" migraphx_status migraphx_module_add_literal(migraphx_instruction_t* out,
migraphx_module_t module,
const_migraphx_shape_t shape,
const char* buffer)
{
auto api_error_result = migraphx::try_([&] {
if(module == nullptr)
MIGRAPHX_THROW(migraphx_status_bad_param, "Bad parameter module: Null pointer");
if(shape == nullptr)
MIGRAPHX_THROW(migraphx_status_bad_param, "Bad parameter shape: Null pointer");
*out = allocate<migraphx_instruction_t>(
(module->object).add_literal((shape->object), (buffer)));
});
return api_error_result;
}
extern "C" migraphx_status migraphx_module_add_parameter(migraphx_instruction_t* out, extern "C" migraphx_status migraphx_module_add_parameter(migraphx_instruction_t* out,
migraphx_module_t module, migraphx_module_t module,
const char* name, const char* name,
......
...@@ -258,6 +258,11 @@ migraphx_status migraphx_module_add_instruction_with_mod_args(migraphx_instructi ...@@ -258,6 +258,11 @@ migraphx_status migraphx_module_add_instruction_with_mod_args(migraphx_instructi
migraphx_instructions_t args, migraphx_instructions_t args,
migraphx_modules_t module_refs); migraphx_modules_t module_refs);
migraphx_status migraphx_module_add_literal(migraphx_instruction_t* out,
migraphx_module_t module,
const_migraphx_shape_t shape,
const char* buffer);
migraphx_status migraphx_module_add_parameter(migraphx_instruction_t* out, migraphx_status migraphx_module_add_parameter(migraphx_instruction_t* out,
migraphx_module_t module, migraphx_module_t module,
const char* name, const char* name,
......
...@@ -314,6 +314,7 @@ struct interface_base : Base ...@@ -314,6 +314,7 @@ struct interface_base : Base
T** y = reinterpret_cast<T**>(out); T** y = reinterpret_cast<T**>(out);
T* x = reinterpret_cast<T*>(input); T* x = reinterpret_cast<T*>(input);
assert(x != nullptr and y != nullptr and *y == nullptr); assert(x != nullptr and y != nullptr and *y == nullptr);
// cppcheck-suppress useSmartPointer
*y = new T(*x); // NOLINT *y = new T(*x); // NOLINT
}); });
}; };
...@@ -339,6 +340,7 @@ struct interface_base : Base ...@@ -339,6 +340,7 @@ struct interface_base : Base
template <class T, class Setter, class F> template <class T, class Setter, class F>
void set_auto_fp(Setter setter, F f) void set_auto_fp(Setter setter, F f)
{ {
// cppcheck-suppress constParameter
return set_fp<T>(setter, [=](T& obj, auto out, auto... xs) { return set_fp<T>(setter, [=](T& obj, auto out, auto... xs) {
auto_invoke(f, out, obj, auto_convert_param(rank<2>{}, xs)...); auto_invoke(f, out, obj, auto_convert_param(rank<2>{}, xs)...);
}); });
...@@ -760,6 +762,15 @@ struct module ...@@ -760,6 +762,15 @@ struct module
return instruction(op_ins, own{}); return instruction(op_ins, own{});
} }
template <typename T>
instruction add_literal(const migraphx::shape& s, T* buffer)
{
migraphx_instruction_t literal_ins;
const auto* buffer_ptr = reinterpret_cast<const char*>(buffer);
call(&migraphx_module_add_literal, &literal_ins, mm.get(), s.get_handle_ptr(), buffer_ptr);
return instruction(literal_ins, own{});
}
instruction add_parameter(const std::string& name, shape s) instruction add_parameter(const std::string& name, shape s)
{ {
migraphx_instruction_t param_ins; migraphx_instruction_t param_ins;
......
...@@ -212,6 +212,9 @@ def module(h): ...@@ -212,6 +212,9 @@ def module(h):
module_refs='std::vector<migraphx::module*>'), module_refs='std::vector<migraphx::module*>'),
fname='add_instruction', fname='add_instruction',
returns='migraphx::instruction_ref') returns='migraphx::instruction_ref')
h.method('add_literal',
api.params(shape='const migraphx::shape&', buffer='const char*'),
returns='migraphx::instruction_ref')
h.method('add_parameter', h.method('add_parameter',
api.params(name='const char*', shape='const migraphx::shape&'), api.params(name='const char*', shape='const migraphx::shape&'),
returns='migraphx::instruction_ref') returns='migraphx::instruction_ref')
......
...@@ -8,10 +8,10 @@ ...@@ -8,10 +8,10 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
void auto_contiguous::apply(module& p) const void auto_contiguous::apply(module& m) const
{ {
std::string key = "require_std_shape"; std::string key = "require_std_shape";
for(auto ins : reverse_iterator_for(p)) for(auto ins : reverse_iterator_for(m))
{ {
auto&& attr = ins->get_operator().attributes(); auto&& attr = ins->get_operator().attributes();
if((attr.get(key, false))) if((attr.get(key, false)))
...@@ -23,18 +23,18 @@ void auto_contiguous::apply(module& p) const ...@@ -23,18 +23,18 @@ void auto_contiguous::apply(module& p) const
{ {
return in; return in;
} }
return p.insert_instruction(ins, make_op("contiguous"), in); return m.insert_instruction(ins, make_op("contiguous"), in);
}); });
if(new_args != args) if(new_args != args)
{ {
p.replace_instruction(ins, ins->get_operator(), new_args); m.replace_instruction(ins, ins->get_operator(), new_args);
} }
} }
} }
auto last = std::prev(p.end()); auto last = std::prev(m.end());
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(m))
{ {
// for last instruction that is NOT a return // for last instruction that is NOT a return
if(ins->outputs().empty() and ins != last) if(ins->outputs().empty() and ins != last)
...@@ -42,8 +42,8 @@ void auto_contiguous::apply(module& p) const ...@@ -42,8 +42,8 @@ void auto_contiguous::apply(module& p) const
shape s = ins->get_shape(); shape s = ins->get_shape();
if(not s.standard() and s.elements() != 0) if(not s.standard() and s.elements() != 0)
{ {
auto c = p.insert_instruction(std::next(ins), make_op("contiguous"), ins); auto c = m.insert_instruction(std::next(ins), make_op("contiguous"), ins);
p.replace_instruction(ins, c); m.replace_instruction(ins, c);
} }
} }
} }
......
...@@ -9,26 +9,6 @@ ...@@ -9,26 +9,6 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
template <class Range, class Iterator>
std::ptrdiff_t bidistance(const Range& r, Iterator start, Iterator last)
{
auto start_forward = start;
auto start_backwards = start;
std::size_t n = 0;
while(start_forward != last and start_backwards != last)
{
n++;
if(start_forward != r.end())
start_forward++;
if(start_backwards != r.begin())
start_backwards--;
}
if(start_forward == last)
return n;
else
return -n;
}
void dead_code_elimination::apply(program& p) const { p.remove_unused_modules(); } void dead_code_elimination::apply(program& p) const { p.remove_unused_modules(); }
void dead_code_elimination::apply(module& m) const void dead_code_elimination::apply(module& m) const
...@@ -48,17 +28,21 @@ void dead_code_elimination::apply(module& m) const ...@@ -48,17 +28,21 @@ void dead_code_elimination::apply(module& m) const
if(i->get_shape().elements() == 0 and i->name().front() != '@' and if(i->get_shape().elements() == 0 and i->name().front() != '@' and
i->name() != "undefined" and i->name() != "identity") i->name() != "undefined" and i->name() != "identity")
continue; continue;
assert(bidistance(m, i, last) > 0); assert(std::distance(m.begin(), i) <= std::distance(m.begin(), last));
std::unordered_set<instruction_ref> visited;
fix([&](auto self, auto leaf) { fix([&](auto self, auto leaf) {
if(not m.has_instruction(leaf)) if(not m.has_instruction(leaf))
return; return;
if(leaf->outputs().empty()) if(leaf->outputs().empty())
{ {
// Dont visit inputs twice
if(not visited.insert(leaf).second)
return;
std::unordered_set<instruction_ref> args(leaf->inputs().begin(), std::unordered_set<instruction_ref> args(leaf->inputs().begin(),
leaf->inputs().end()); leaf->inputs().end());
leaf->clear_arguments(); leaf->clear_arguments();
assert(bidistance(m, last, leaf) < 0); assert(std::distance(m.begin(), leaf) < std::distance(m.begin(), last));
assert(leaf != ins); assert(leaf != ins);
if(leaf->name() != "@param") if(leaf->name() != "@param")
m.move_instruction(leaf, m.end()); m.move_instruction(leaf, m.end());
......
...@@ -17,7 +17,7 @@ class marker_roctx ...@@ -17,7 +17,7 @@ class marker_roctx
std::function<int(const char*)> sym_roctx_range_push; std::function<int(const char*)> sym_roctx_range_push;
std::function<int()> sym_roctx_range_pop; std::function<int()> sym_roctx_range_pop;
uint64_t range_id; uint64_t range_id = 0;
public: public:
marker_roctx() marker_roctx()
......
...@@ -13,13 +13,13 @@ ...@@ -13,13 +13,13 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
void eliminate_allocation::apply(module& p) const void eliminate_allocation::apply(module& m) const
{ {
assert(alignment > 0); assert(alignment > 0);
std::size_t n = 0; std::size_t n = 0;
std::vector<std::pair<instruction_ref, std::size_t>> allocs; std::vector<std::pair<instruction_ref, std::size_t>> allocs;
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(m))
{ {
if(ins->name() != allocation_op) if(ins->name() != allocation_op)
continue; continue;
...@@ -30,13 +30,13 @@ void eliminate_allocation::apply(module& p) const ...@@ -30,13 +30,13 @@ void eliminate_allocation::apply(module& p) const
} }
if(n > 0) if(n > 0)
{ {
auto mem = p.add_parameter("memory", shape{shape::int8_type, {n}}); auto mem = m.add_parameter("memory", shape{shape::int8_type, {n}});
for(auto&& pp : allocs) for(auto&& pp : allocs)
{ {
auto ins = pp.first; auto ins = pp.first;
auto s = ins->get_shape(); auto s = ins->get_shape();
auto offset = pp.second; auto offset = pp.second;
p.replace_instruction( m.replace_instruction(
ins, make_op("load", {{"shape", to_value(s)}, {"offset", offset}}), mem); ins, make_op("load", {{"shape", to_value(s)}, {"offset", offset}}), mem);
} }
} }
......
...@@ -11,7 +11,7 @@ namespace migraphx { ...@@ -11,7 +11,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
template <class Range> template <class Range>
void cse_range(module& p, Range&& r) void cse_range(module& m, Range&& r)
{ {
std::unordered_multimap<std::string, instruction_ref> instructions; std::unordered_multimap<std::string, instruction_ref> instructions;
std::unordered_set<instruction_ref> processed_ins; std::unordered_set<instruction_ref> processed_ins;
...@@ -30,24 +30,24 @@ void cse_range(module& p, Range&& r) ...@@ -30,24 +30,24 @@ void cse_range(module& p, Range&& r)
continue; continue;
if(*eq != *ins) if(*eq != *ins)
continue; continue;
p.replace_instruction(ins, eq); m.replace_instruction(ins, eq);
processed_ins.emplace(ins); processed_ins.emplace(ins);
std::vector<instruction_ref> outputs; std::vector<instruction_ref> outputs;
std::copy_if(eq->outputs().begin(), std::copy_if(eq->outputs().begin(),
eq->outputs().end(), eq->outputs().end(),
std::back_inserter(outputs), std::back_inserter(outputs),
[&](auto x) { return p.has_instruction(x); }); [&](auto x) { return m.has_instruction(x); });
std::sort(outputs.begin(), outputs.end(), [&](auto x, auto y) { std::sort(outputs.begin(), outputs.end(), [&](auto x, auto y) {
return std::distance(eq, x) < std::distance(eq, y); return std::distance(eq, x) < std::distance(eq, y);
}); });
cse_range(p, outputs); cse_range(m, outputs);
} }
instructions.emplace(ins->name(), ins); instructions.emplace(ins->name(), ins);
} }
} }
void eliminate_common_subexpression::apply(module& p) const { cse_range(p, iterator_for(p)); } void eliminate_common_subexpression::apply(module& m) const { cse_range(m, iterator_for(m)); }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment