Commit fdff74d6 authored by Casper Hansen's avatar Casper Hansen
Browse files

Set the batch_size

parent 6e0bde15
......@@ -43,7 +43,7 @@ def run_round(model_path, quant_file, n_generate, input_ids, batch_size):
print(f" -- Loading model...")
model = AutoAWQForCausalLM.from_quantized(
model_path, quant_file, fuse_layers=True,
max_new_tokens=n_generate
max_new_tokens=n_generate, batch_size=batch_size
)
print(f" -- Warming up...")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment