Commit a7668e46 authored by zhuwenwen's avatar zhuwenwen
Browse files

[fix]fix tests of v1 and worker

parent 4a62a3eb
......@@ -32,7 +32,7 @@ def test_deepseek_mla_attn_backend_module():
trust_remote_code=True,
enable_chunked_prefill=False,
)
assert model_runner.attn_backend.__name__ == "TritonMLABackend"
assert model_runner.attn_backend.__name__ == "FlashMLABackend" # "TritonMLABackend"
@pytest.mark.parametrize("batch_size", list(range(1, 257, 3)))
......
......@@ -799,7 +799,6 @@ class FusedMoE(torch.nn.Module):
if quant_config is None:
# Not considering quant for now, temporarily
self.use_nn_moe = int(os.environ.get('MOE_NN', 1)) == 1
# self.use_nn_moe = os.environ.get('MOE_NN') == '1'
else:
self.use_nn_moe = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment