Commit ae3524c6 authored by zhuwenwen's avatar zhuwenwen
Browse files

update benchmark_throughput.py

parent 163b243a
...@@ -287,17 +287,15 @@ def main(args: argparse.Namespace): ...@@ -287,17 +287,15 @@ def main(args: argparse.Namespace):
raise ValueError(f"Unknown backend: {args.backend}") raise ValueError(f"Unknown backend: {args.backend}")
total_num_tokens = sum(prompt_len + output_len total_num_tokens = sum(prompt_len + output_len
for _, prompt_len, output_len in requests) for _, prompt_len, output_len in requests)
print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
f"{total_num_tokens / elapsed_time:.2f} tokens/s")
# if args.dataset is None: if args.dataset is None:
# total_out_tokens = args.output_len * args.num_prompts total_out_tokens = args.output_len * args.num_prompts
# else: else:
# total_out_tokens = sum(output_len for _, _, output_len in requests) total_out_tokens = sum(output_len for _, _, output_len in requests)
# print(f"Latency: {elapsed_time:.2f} s") print(f"Latency: {elapsed_time:.2f} s")
# print(f"All Throughput: {len(requests) / elapsed_time:.2f} requests/s, " print(f"All Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
# f"{total_num_tokens / elapsed_time:.2f} tokens/s") f"{total_num_tokens / elapsed_time:.2f} tokens/s")
# print(f"Generate Throughput: {total_out_tokens / elapsed_time:.2f} tokens/s") print(f"Generate Throughput: {total_out_tokens / elapsed_time:.2f} tokens/s")
# Output JSON results if specified # Output JSON results if specified
...@@ -492,4 +490,4 @@ if __name__ == "__main__": ...@@ -492,4 +490,4 @@ if __name__ == "__main__":
if args.tokenizer != args.model: if args.tokenizer != args.model:
raise ValueError("Tokenizer must be the same as the model for MII " raise ValueError("Tokenizer must be the same as the model for MII "
"backend.") "backend.")
main(args) main(args)
\ No newline at end of file
...@@ -287,17 +287,15 @@ def main(args: argparse.Namespace): ...@@ -287,17 +287,15 @@ def main(args: argparse.Namespace):
raise ValueError(f"Unknown backend: {args.backend}") raise ValueError(f"Unknown backend: {args.backend}")
total_num_tokens = sum(prompt_len + output_len total_num_tokens = sum(prompt_len + output_len
for _, prompt_len, output_len in requests) for _, prompt_len, output_len in requests)
print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
f"{total_num_tokens / elapsed_time:.2f} tokens/s")
# if args.dataset is None: if args.dataset is None:
# total_out_tokens = args.output_len * args.num_prompts total_out_tokens = args.output_len * args.num_prompts
# else: else:
# total_out_tokens = sum(output_len for _, _, output_len in requests) total_out_tokens = sum(output_len for _, _, output_len in requests)
# print(f"Latency: {elapsed_time:.2f} s") print(f"Latency: {elapsed_time:.2f} s")
# print(f"All Throughput: {len(requests) / elapsed_time:.2f} requests/s, " print(f"All Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
# f"{total_num_tokens / elapsed_time:.2f} tokens/s") f"{total_num_tokens / elapsed_time:.2f} tokens/s")
# print(f"Generate Throughput: {total_out_tokens / elapsed_time:.2f} tokens/s") print(f"Generate Throughput: {total_out_tokens / elapsed_time:.2f} tokens/s")
# Output JSON results if specified # Output JSON results if specified
......
...@@ -195,7 +195,7 @@ class AWQLinearMethod(LinearMethodBase): ...@@ -195,7 +195,7 @@ class AWQLinearMethod(LinearMethodBase):
else: else:
padding_group=0 padding_group=0
if m<4096: if m<20000:
out = ops.awq_gemm(reshaped_x, out = ops.awq_gemm(reshaped_x,
qweight, qweight,
zeros_and_scales, zeros_and_scales,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment