Commit 3272b22e authored by Shucai Xiao's avatar Shucai Xiao
Browse files

clang format

parent 94e3a2e4
...@@ -150,9 +150,9 @@ struct memory_coloring_impl ...@@ -150,9 +150,9 @@ struct memory_coloring_impl
// Priority queue for coloring. // Priority queue for coloring.
std::priority_queue<interval_ptr, std::vector<interval_ptr>, ordering> alloc_queue{}; std::priority_queue<interval_ptr, std::vector<interval_ptr>, ordering> alloc_queue{};
int num_of_lives = 0; int num_of_lives = 0;
int max_value_number = -1; int max_value_number = -1;
int required_bytes = 0; int required_bytes = 0;
// The earliest program point where an live interval ends. // The earliest program point where an live interval ends.
int earliest_end_point = -1; int earliest_end_point = -1;
// The latest program point where an live interval ends. // The latest program point where an live interval ends.
......
...@@ -565,8 +565,8 @@ void program::from_value(const value& v) ...@@ -565,8 +565,8 @@ void program::from_value(const value& v)
double common_average(const std::vector<double>& v) double common_average(const std::vector<double>& v)
{ {
int n = v.size() / 4; int n = v.size() / 4;
double total = std::accumulate(v.begin() + n, v.end() - n, 0.0); double total = std::accumulate(v.begin() + n, v.end() - n, 0.0);
return total / std::distance(v.begin() + n, v.end() - n); return total / std::distance(v.begin() + n, v.end() - n);
} }
...@@ -596,10 +596,7 @@ void program::mark(const parameter_map& params, marker&& m) ...@@ -596,10 +596,7 @@ void program::mark(const parameter_map& params, marker&& m)
m.mark_stop(*this); m.mark_stop(*this);
} }
void program::perf_report(std::ostream& os, void program::perf_report(std::ostream& os, int n, parameter_map params, int batch) const
int n,
parameter_map params,
int batch) const
{ {
auto& ctx = this->impl->ctx; auto& ctx = this->impl->ctx;
// Run once by itself // Run once by itself
......
...@@ -337,8 +337,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -337,8 +337,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
}, },
"Parse onnx file", "Parse onnx file",
py::arg("filename"), py::arg("filename"),
py::arg("default_dim_value") = 1, py::arg("default_dim_value") = 1,
py::arg("map_input_dims") = std::unordered_map<std::string, std::vector<int>>(), py::arg("map_input_dims") = std::unordered_map<std::string, std::vector<int>>(),
py::arg("skip_unknown_operators") = false, py::arg("skip_unknown_operators") = false,
py::arg("print_program_on_error") = false, py::arg("print_program_on_error") = false,
py::arg("max_loop_iterations") = 10); py::arg("max_loop_iterations") = 10);
...@@ -358,8 +358,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -358,8 +358,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
}, },
"Parse onnx file", "Parse onnx file",
py::arg("filename"), py::arg("filename"),
py::arg("default_dim_value") = 1, py::arg("default_dim_value") = 1,
py::arg("map_input_dims") = std::unordered_map<std::string, std::vector<int>>(), py::arg("map_input_dims") = std::unordered_map<std::string, std::vector<int>>(),
py::arg("skip_unknown_operators") = false, py::arg("skip_unknown_operators") = false,
py::arg("print_program_on_error") = false); py::arg("print_program_on_error") = false);
......
...@@ -578,8 +578,8 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward, ...@@ -578,8 +578,8 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
auto trh = prog.insert_instruction(ins, make_op("transpose", {{"permutation", perm}}), rh); auto trh = prog.insert_instruction(ins, make_op("transpose", {{"permutation", perm}}), rh);
// initial states // initial states
auto sih = prog.insert_instruction(ins, make_op("squeeze", {{"axes", {0}}}), ih); auto sih = prog.insert_instruction(ins, make_op("squeeze", {{"axes", {0}}}), ih);
int bs = ih->get_shape().lens()[1]; int bs = ih->get_shape().lens()[1];
// bias // bias
instruction_ref bwb{}; instruction_ref bwb{};
......
...@@ -50,7 +50,7 @@ struct stream_info ...@@ -50,7 +50,7 @@ struct stream_info
if(not contains(weights, ins)) if(not contains(weights, ins))
{ {
int weight = 0; int weight = 0;
auto&& op = ins->get_operator(); auto&& op = ins->get_operator();
if(not is_context_free(op) and op.name()[0] != '@') if(not is_context_free(op) and op.name()[0] != '@')
weight = model.weight(op); weight = model.weight(op);
// This will ensure a stream will be assigned to return // This will ensure a stream will be assigned to return
......
...@@ -29,10 +29,10 @@ struct cpu_gather : auto_register_op<cpu_gather> ...@@ -29,10 +29,10 @@ struct cpu_gather : auto_register_op<cpu_gather>
// cppcheck-suppress constParameter // cppcheck-suppress constParameter
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
int nelements = output_shape.elements(); int nelements = output_shape.elements();
auto lens = args[0].get_shape().lens(); auto lens = args[0].get_shape().lens();
auto axis_dim_size = lens[op.axis]; auto axis_dim_size = lens[op.axis];
lens[op.axis] = args[1].get_shape().elements(); lens[op.axis] = args[1].get_shape().elements();
shape out_comp{output_shape.type(), lens}; shape out_comp{output_shape.type(), lens};
visit_all(args.back(), args[0])([&](auto output, auto input) { visit_all(args.back(), args[0])([&](auto output, auto input) {
......
...@@ -103,9 +103,7 @@ struct cpu_im2col ...@@ -103,9 +103,7 @@ struct cpu_im2col
// compute linear index for output // compute linear index for output
int ldx = ioutput * col_width + joutput; int ldx = ioutput * col_width + joutput;
int p = 0; int p = 0;
dfor(channels, dfor(channels, kernel_h, kernel_w)([&](int c, int koffset, int loffset) {
kernel_h,
kernel_w)([&](int c, int koffset, int loffset) {
auto idx = iinput + long(koffset) - kdiv2_h; auto idx = iinput + long(koffset) - kdiv2_h;
auto jdx = jinput + long(loffset) - kdiv2_w; auto jdx = jinput + long(loffset) - kdiv2_w;
col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width)) col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
......
...@@ -12,24 +12,24 @@ namespace device { ...@@ -12,24 +12,24 @@ namespace device {
void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument& arg) void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument& arg)
{ {
auto comp_shape = arg.get_shape(); auto comp_shape = arg.get_shape();
auto out_lens = comp_shape.lens(); auto out_lens = comp_shape.lens();
auto dim_0 = out_lens.size() - 2; auto dim_0 = out_lens.size() - 2;
auto dim_1 = out_lens.size() - 1; auto dim_1 = out_lens.size() - 1;
int lda = comp_shape.strides()[dim_0]; int lda = comp_shape.strides()[dim_0];
int m_size = out_lens[dim_0] * out_lens[dim_1]; int m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) { visit_all(result, arg)([&](auto output, auto input) {
int nelements = comp_shape.elements(); int nelements = comp_shape.elements();
auto* out_ptr = device_cast(output.data()); auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) { visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(comp_shape); hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements, 256)([=](auto ii) __device__ { gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
const int nb = 4; const int nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
int i_m = idx[dim_1]; int i_m = idx[dim_1];
int i_k = idx[dim_0]; int i_k = idx[dim_0];
int offset = ii / m_size * m_size; int offset = ii / m_size * m_size;
out_ptr[i_k % nb + (i_m + (i_k / nb) * lda) * nb + offset] = out_ptr[i_k % nb + (i_m + (i_k / nb) * lda) * nb + offset] =
in_ptr[i_m + i_k * lda + offset]; in_ptr[i_m + i_k * lda + offset];
}); });
...@@ -43,7 +43,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument ...@@ -43,7 +43,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
auto out_lens = trans_shape.lens(); auto out_lens = trans_shape.lens();
auto dim_0 = trans_shape.lens().size() - 2; auto dim_0 = trans_shape.lens().size() - 2;
auto dim_1 = trans_shape.lens().size() - 1; auto dim_1 = trans_shape.lens().size() - 1;
int ldb = trans_shape.strides()[dim_1]; int ldb = trans_shape.strides()[dim_1];
auto wrap_lens = out_lens; auto wrap_lens = out_lens;
std::swap(wrap_lens[dim_0], wrap_lens[dim_1]); std::swap(wrap_lens[dim_0], wrap_lens[dim_1]);
...@@ -51,16 +51,16 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument ...@@ -51,16 +51,16 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
int m_size = out_lens[dim_0] * out_lens[dim_1]; int m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) { visit_all(result, arg)([&](auto output, auto input) {
int nelements = comp_shape.elements(); int nelements = comp_shape.elements();
auto* out_ptr = device_cast(output.data()); auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) { visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(comp_shape); hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements, 256)([=](auto ii) __device__ { gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
const int nb = 4; const int nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
int i_n = idx[dim_1]; int i_n = idx[dim_1];
int i_k = idx[dim_0]; int i_k = idx[dim_0];
int offset = ii / m_size * m_size; int offset = ii / m_size * m_size;
out_ptr[i_k % nb + (i_n + (i_k / nb) * ldb) * nb + offset] = out_ptr[i_k % nb + (i_n + (i_k / nb) * ldb) * nb + offset] =
in_ptr[i_n + i_k * ldb + offset]; in_ptr[i_n + i_k * ldb + offset];
}); });
......
...@@ -79,12 +79,8 @@ __device__ auto auto_block_reduce(index idx, Op op, T init, index_int n, F f) ...@@ -79,12 +79,8 @@ __device__ auto auto_block_reduce(index idx, Op op, T init, index_int n, F f)
} }
template <index_int MaxBlockSize, class Input, class Output> template <index_int MaxBlockSize, class Input, class Output>
__device__ void layernorm(index_int i, __device__ void layernorm(
index idx, index_int i, index idx, int block_size_div, index_int relements, Input input, Output output)
int block_size_div,
index_int relements,
Input input,
Output output)
{ {
using value_type = decltype(input(idx.local)); using value_type = decltype(input(idx.local));
const auto relements_v = relements / vector_size<value_type>{}; const auto relements_v = relements / vector_size<value_type>{};
...@@ -128,7 +124,7 @@ void layernorm_vec_impl(hipStream_t stream, ...@@ -128,7 +124,7 @@ void layernorm_vec_impl(hipStream_t stream,
const Arguments&... args) const Arguments&... args)
{ {
hip_vec_visit_all<N>(result, args...)([&](auto output, auto... inputs) { hip_vec_visit_all<N>(result, args...)([&](auto output, auto... inputs) {
const auto relements_v = relements / N; const auto relements_v = relements / N;
const int max_block_size = 256; const int max_block_size = 256;
const int block_size = compute_block_size(relements_v, max_block_size); const int block_size = compute_block_size(relements_v, max_block_size);
const int block_size_div = encode_divisor(block_size); const int block_size_div = encode_divisor(block_size);
......
...@@ -597,7 +597,7 @@ struct miopen_fusion ...@@ -597,7 +597,7 @@ struct miopen_fusion
// Compensate for allocation // Compensate for allocation
inputs.pop_back(); inputs.pop_back();
int i = 0; int i = 0;
f = fusion(inputs[i]); f = fusion(inputs[i]);
i++; i++;
std::vector<std::function<void(const fused_operator_args&, const std::vector<argument>&)>> std::vector<std::function<void(const fused_operator_args&, const std::vector<argument>&)>>
invokers; invokers;
......
...@@ -89,8 +89,8 @@ void gemm_impl(context& ctx, ...@@ -89,8 +89,8 @@ void gemm_impl(context& ctx,
MIGRAPHX_THROW("ROCBLAS_GEMM: k size of int8 type input must be mutlple of 4!"); MIGRAPHX_THROW("ROCBLAS_GEMM: k size of int8 type input must be mutlple of 4!");
} }
auto num_matrices = std::accumulate( auto num_matrices =
out_lens.rbegin() + 2, out_lens.rend(), int{1}, std::multiplies<int>()); std::accumulate(out_lens.rbegin() + 2, out_lens.rend(), int{1}, std::multiplies<int>());
if(num_matrices == 1) if(num_matrices == 1)
{ {
// the rocblas_gemm API handles inputs and output matrices as // the rocblas_gemm API handles inputs and output matrices as
......
...@@ -124,7 +124,7 @@ struct hip_device ...@@ -124,7 +124,7 @@ struct hip_device
} }
private: private:
int id = 0; int id = 0;
shared<hip_stream_ptr> s = nullptr; shared<hip_stream_ptr> s = nullptr;
shared<miopen_handle> mihandle = nullptr; shared<miopen_handle> mihandle = nullptr;
shared<rocblas_handle_ptr> rbhandle = nullptr; shared<rocblas_handle_ptr> rbhandle = nullptr;
...@@ -187,10 +187,7 @@ struct context ...@@ -187,10 +187,7 @@ struct context
hip_device::stream& get_stream(int n) { return get_current_device().get_stream(n); } hip_device::stream& get_stream(int n) { return get_current_device().get_stream(n); }
const hip_device::stream& get_stream() const { return get_current_device().get_stream(); } const hip_device::stream& get_stream() const { return get_current_device().get_stream(); }
const hip_device::stream& get_stream(int n) const const hip_device::stream& get_stream(int n) const { return get_current_device().get_stream(n); }
{
return get_current_device().get_stream(n);
}
void set_stream(int n) { get_current_device().set_stream(n); } void set_stream(int n) { get_current_device().set_stream(n); }
...@@ -225,12 +222,12 @@ struct context ...@@ -225,12 +222,12 @@ struct context
void from_value(const value& v) void from_value(const value& v)
{ {
auto v_events = v.at("events"); auto v_events = v.at("events");
int n_events = v_events.without_key().to<int>(); int n_events = v_events.without_key().to<int>();
this->create_events(n_events - 1); this->create_events(n_events - 1);
auto v_streams = v.at("streams"); auto v_streams = v.at("streams");
int n_streams = v_streams.without_key().to<int>(); int n_streams = v_streams.without_key().to<int>();
this->current_device = std::make_shared<hip_device>(0, n_streams); this->current_device = std::make_shared<hip_device>(0, n_streams);
} }
......
...@@ -71,10 +71,10 @@ struct argmin_op ...@@ -71,10 +71,10 @@ struct argmin_op
template <class Op> template <class Op>
void arg_op(Op op, hipStream_t stream, const argument& result, const argument& arg, int64_t axis) void arg_op(Op op, hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
{ {
auto arg_shape = arg.get_shape(); auto arg_shape = arg.get_shape();
auto batch_lens = arg_shape.lens(); auto batch_lens = arg_shape.lens();
int batch_item_num = batch_lens[axis]; int batch_item_num = batch_lens[axis];
batch_lens[axis] = 1; batch_lens[axis] = 1;
migraphx::shape batch_shape{arg_shape.type(), batch_lens}; migraphx::shape batch_shape{arg_shape.type(), batch_lens};
migraphx::shape std_arg_shape{arg_shape.type(), arg_shape.lens()}; migraphx::shape std_arg_shape{arg_shape.type(), arg_shape.lens()};
...@@ -82,8 +82,8 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a ...@@ -82,8 +82,8 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a
auto* output = device_cast(result.get<int64_t>().data()); auto* output = device_cast(result.get<int64_t>().data());
using type = device_type<std::remove_cv_t<typename decltype(input)::value_type>>; using type = device_type<std::remove_cv_t<typename decltype(input)::value_type>>;
// use one block for items in one batch. // use one block for items in one batch.
const int max_block_size = 256; const int max_block_size = 256;
const int block_size = compute_block_size(batch_item_num, max_block_size); const int block_size = compute_block_size(batch_item_num, max_block_size);
gs_launch(stream, gs_launch(stream,
batch_shape.elements() * block_size, batch_shape.elements() * block_size,
block_size)([=](auto i, auto idx) __device__ { block_size)([=](auto i, auto idx) __device__ {
......
...@@ -29,10 +29,7 @@ struct kernel ...@@ -29,10 +29,7 @@ struct kernel
int local, int local,
const std::vector<kernel_argument>& args) const; const std::vector<kernel_argument>& args) const;
void launch(hipStream_t stream, void launch(hipStream_t stream, int global, int local, std::vector<void*> args) const;
int global,
int local,
std::vector<void*> args) const;
auto launch(hipStream_t stream, int global, int local) const auto launch(hipStream_t stream, int global, int local) const
{ {
......
...@@ -52,12 +52,8 @@ kernel::kernel(const char* image, const std::string& name) : impl(std::make_shar ...@@ -52,12 +52,8 @@ kernel::kernel(const char* image, const std::string& name) : impl(std::make_shar
MIGRAPHX_THROW("Failed to get function: " + name + ": " + hip_error(status)); MIGRAPHX_THROW("Failed to get function: " + name + ": " + hip_error(status));
} }
void launch_kernel(hipFunction_t fun, void launch_kernel(
hipStream_t stream, hipFunction_t fun, hipStream_t stream, int global, int local, void* kernargs, int size)
int global,
int local,
void* kernargs,
int size)
{ {
void* config[] = { void* config[] = {
// HIP_LAUNCH_PARAM_* are macros that do horrible things // HIP_LAUNCH_PARAM_* are macros that do horrible things
...@@ -78,14 +74,11 @@ void launch_kernel(hipFunction_t fun, ...@@ -78,14 +74,11 @@ void launch_kernel(hipFunction_t fun,
MIGRAPHX_THROW("Failed to launch kernel: " + hip_error(status)); MIGRAPHX_THROW("Failed to launch kernel: " + hip_error(status));
} }
void kernel::launch(hipStream_t stream, void kernel::launch(hipStream_t stream, int global, int local, std::vector<void*> args) const
int global,
int local,
std::vector<void*> args) const
{ {
assert(impl != nullptr); assert(impl != nullptr);
void* kernargs = args.data(); void* kernargs = args.data();
int size = args.size() * sizeof(void*); int size = args.size() * sizeof(void*);
launch_kernel(impl->fun, stream, global, local, kernargs, size); launch_kernel(impl->fun, stream, global, local, kernargs, size);
} }
...@@ -97,7 +90,7 @@ void kernel::launch(hipStream_t stream, ...@@ -97,7 +90,7 @@ void kernel::launch(hipStream_t stream,
{ {
assert(impl != nullptr); assert(impl != nullptr);
std::vector<char> kernargs = pack_args(args); std::vector<char> kernargs = pack_args(args);
int size = kernargs.size(); int size = kernargs.size();
launch_kernel(impl->fun, stream, global, local, kernargs.data(), size); launch_kernel(impl->fun, stream, global, local, kernargs.data(), size);
} }
......
...@@ -43,10 +43,8 @@ struct avg_pool ...@@ -43,10 +43,8 @@ struct avg_pool
}; };
template <class T, class Op> template <class T, class Op>
MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data, MIGRAPHX_DEVICE_CONSTEXPR T
const array<int, 2>& dims, bilinear_interpolate(const T* data, const array<int, 2>& dims, array<float, 2> xy, Op pooling)
array<float, 2> xy,
Op pooling)
{ {
array<int, 2> low{}; array<int, 2> low{};
array<int, 2> high{}; array<int, 2> high{};
...@@ -66,9 +64,9 @@ MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data, ...@@ -66,9 +64,9 @@ MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data,
} }
} }
array<int, 4> locs = {low[0] * dims[1] + low[1], array<int, 4> locs = {low[0] * dims[1] + low[1],
low[0] * dims[1] + high[1], low[0] * dims[1] + high[1],
high[0] * dims[1] + low[1], high[0] * dims[1] + low[1],
high[0] * dims[1] + high[1]}; high[0] * dims[1] + high[1]};
float ly = xy[0] - low[0]; float ly = xy[0] - low[0];
float lx = xy[1] - low[1]; float lx = xy[1] - low[1];
...@@ -142,7 +140,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -142,7 +140,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
// output dims of height and width, in all 2-dim arrays, the first dim // output dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width // is for height and second dim is for width
const auto& out_lens = out_s.lens; const auto& out_lens = out_s.lens;
array<int, 2> out_dims = {out_lens[2], out_lens[3]}; array<int, 2> out_dims = {out_lens[2], out_lens[3]};
for(index_int i = index.global; i < out_s.elements(); i += stride) for(index_int i = index.global; i < out_s.elements(); i += stride)
......
...@@ -225,7 +225,7 @@ struct mlir_apply ...@@ -225,7 +225,7 @@ struct mlir_apply
void add_memref_descriptor(std::vector<instruction_ref>& refs, instruction_ref inst) void add_memref_descriptor(std::vector<instruction_ref>& refs, instruction_ref inst)
{ {
const int offset = 0; const int offset = 0;
auto inst_t = inst->get_shape(); auto inst_t = inst->get_shape();
refs.push_back(inst); refs.push_back(inst);
refs.push_back(inst); refs.push_back(inst);
refs.push_back(get_literal(offset)); // offset refs.push_back(get_literal(offset)); // offset
......
...@@ -10,7 +10,7 @@ std::vector<char> pack_args(const std::vector<kernel_argument>& args) ...@@ -10,7 +10,7 @@ std::vector<char> pack_args(const std::vector<kernel_argument>& args)
std::vector<char> kernargs; std::vector<char> kernargs;
for(auto&& arg : args) for(auto&& arg : args)
{ {
int n = arg.size; int n = arg.size;
const auto* p = static_cast<const char*>(arg.data); const auto* p = static_cast<const char*>(arg.data);
// Insert padding // Insert padding
int padding = (arg.align - (kernargs.size() % arg.align)) % arg.align; int padding = (arg.align - (kernargs.size() % arg.align)) % arg.align;
......
...@@ -20,8 +20,14 @@ static auto make_mat(tensor_view<T> x) ...@@ -20,8 +20,14 @@ static auto make_mat(tensor_view<T> x)
int dim_0 = n_dims - 2; int dim_0 = n_dims - 2;
int dim_1 = n_dims - 1; int dim_1 = n_dims - 1;
if(s.transposed()) if(s.transposed())
return matrix<T>{x.data(), static_cast<std::size_t>(s.lens()[dim_1]), static_cast<std::size_t>(s.lens()[dim_0]), static_cast<std::size_t>(s.strides()[dim_1])}; return matrix<T>{x.data(),
return matrix<T>{x.data(), static_cast<std::size_t>(s.lens()[dim_0]), static_cast<std::size_t>(s.lens()[dim_1]), static_cast<std::size_t>(s.strides()[dim_0])}; static_cast<std::size_t>(s.lens()[dim_1]),
static_cast<std::size_t>(s.lens()[dim_0]),
static_cast<std::size_t>(s.strides()[dim_1])};
return matrix<T>{x.data(),
static_cast<std::size_t>(s.lens()[dim_0]),
static_cast<std::size_t>(s.lens()[dim_1]),
static_cast<std::size_t>(s.strides()[dim_0])};
} }
template <class T, class F> template <class T, class F>
......
...@@ -317,9 +317,7 @@ struct ref_im2col ...@@ -317,9 +317,7 @@ struct ref_im2col
// compute linear index for output // compute linear index for output
int ldx = ioutput * col_width + joutput; int ldx = ioutput * col_width + joutput;
int p = 0; int p = 0;
dfor(channels, dfor(channels, kernel_h, kernel_w)([&](int c, int koffset, int loffset) {
kernel_h,
kernel_w)([&](int c, int koffset, int loffset) {
auto idx = iinput + long(koffset) - kdiv2_h; auto idx = iinput + long(koffset) - kdiv2_h;
auto jdx = jinput + long(loffset) - kdiv2_w; auto jdx = jinput + long(loffset) - kdiv2_w;
col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width)) col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
...@@ -650,7 +648,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>> ...@@ -650,7 +648,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
argument result{output_shape}; argument result{output_shape};
auto batch_lens = output_shape.lens(); auto batch_lens = output_shape.lens();
int64_t tuned_axis = tune_axis(args[0].get_shape().lens().size(), op.axis, op.name()); int64_t tuned_axis = tune_axis(args[0].get_shape().lens().size(), op.axis, op.name());
int n_dims = batch_lens[tuned_axis]; int n_dims = batch_lens[tuned_axis];
batch_lens[tuned_axis] = 1; batch_lens[tuned_axis] = 1;
shape batch_shape{shape::int32_type, batch_lens}; shape batch_shape{shape::int32_type, batch_lens};
...@@ -670,9 +668,9 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>> ...@@ -670,9 +668,9 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
for(int j = 0; j < n_dims; ++j) for(int j = 0; j < n_dims; ++j)
{ {
idx[tuned_axis] = j; idx[tuned_axis] = j;
int index = output_shape.index(idx); int index = output_shape.index(idx);
output[index] = std::exp(input[index] - batch_max[i]); output[index] = std::exp(input[index] - batch_max[i]);
} }
for(int j = 0; j < n_dims; ++j) for(int j = 0; j < n_dims; ++j)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment