Commit 99c96638 authored by Paul's avatar Paul
Browse files

Adjust upper limit

parent 06607821
......@@ -85,7 +85,7 @@ struct layernorm_compiler : compiler<layernorm_compiler>
auto preloads = preload::broadcasts(axis, inputs);
auto relements = inputs[0].lens()[axis] / vec.size;
auto nelements = (inputs.back().elements() / inputs[0].lens()[axis]);
auto block_size = compute_block_size(relements, 256);
auto block_size = compute_block_size(relements, 512);
hip_compile_options options;
options.set_launch_params(
v, compute_global_for(ctx, nelements * block_size, 256), block_size);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment