"vscode:/vscode.git/clone" did not exist on "13e4b7193881be9031e4cf7bc379cde61a94e14f"
Commit becfa5b2 authored by Paul's avatar Paul
Browse files

Only vectorize on block algo

parent 915ecaa5
......@@ -119,16 +119,14 @@ struct fused_reduce_compiler : compiler<fused_reduce_compiler>
options.virtual_inputs = virtual_inputs;
auto faxis = find_fast_axis({options.virtual_inputs.front()});
vectorize vec{};
// Vectorize if the axis is a reduction axis
if(options.virtual_inputs.back().lens()[faxis] == 1)
{
vec = vectorize::elements(ctx, faxis, options.virtual_inputs);
}
auto relements = reduced_shape.elements() / vec.size;
auto nelements = options.virtual_inputs.back().elements();
auto algo = v.get("algo", get_reduce_algo(options.virtual_inputs, reduced_shape.lens()));
if(algo == "block")
{
// Vectorize if the axis is a reduction axis
if(options.virtual_inputs.back().lens()[faxis] == 1)
vec = vectorize::elements(ctx, faxis, options.virtual_inputs);
auto relements = reduced_shape.elements() / vec.size;
auto block_size = compute_block_size(relements, 256);
options.set_launch_params(
v, compute_global_for(ctx, nelements * block_size, 256), block_size);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment