Commit 5f15bdb5 authored by gaoqiong's avatar gaoqiong
Browse files

增加blockint8支持优化

parent f3deca99
......@@ -1734,14 +1734,14 @@ def fused_experts_impl(hidden_states: torch.Tensor,
torch.ops._C.silu_and_mul(intermediate_cache2,
intermediate_cache1.view(-1, N))
if use_int8_w8a8:
m1=intermediate_cache2.shape[0]
if m1<=16:
config =stage2_best_config[m1-1]
elif m1<=32:
m=curr_hidden_states.shape[0]
if m<=16:
config =stage2_best_config[m-1]
elif m<=32:
config =stage2_best_config[15]
elif m1<=64:
elif m<=64:
config =stage2_best_config[16]
elif m1<256:
elif m<256:
config ={
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment