Unverified Commit cfa134d2 authored by Tyler Michael Smith's avatar Tyler Michael Smith Committed by GitHub
Browse files

[Bugfix/CI] Fixup benchmark_moe.py (#12562)



Fixes `is_marlin` not being passed into `get_default_config`

Also allow `--tensor-parallel-size` in addition to `-tp` and `--tp-size`
Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
parent 35b7a055
......@@ -343,9 +343,13 @@ class BenchmarkWorker:
op_config = get_moe_configs(num_experts, shard_intermediate_size // 2,
dtype_str)
if op_config is None:
config = get_default_config(num_tokens, num_experts,
shard_intermediate_size, hidden_size,
topk, dtype_str)
config = get_default_config(num_tokens,
num_experts,
shard_intermediate_size,
hidden_size,
topk,
dtype_str,
is_marlin=False)
else:
config = op_config[min(op_config.keys(),
key=lambda x: abs(x - num_tokens))]
......@@ -536,7 +540,11 @@ if __name__ == "__main__":
parser.add_argument("--model",
type=str,
default="mistralai/Mixtral-8x7B-Instruct-v0.1")
parser.add_argument("--tp-size", "-tp", type=int, default=2)
parser.add_argument("--tp-size",
"-tp",
"--tensor-parallel-size",
type=int,
default=2)
parser.add_argument("--dtype",
type=str,
choices=["auto", "fp8_w8a8", "int8_w8a16"],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment