Commit 0f699cf9 authored by Casper Hansen's avatar Casper Hansen
Browse files

Fix the metrics with batch size

parent 883e47d5
...@@ -121,10 +121,10 @@ def run_speed(model_path, quant_file, device, n_generate=128, n_context=256, bat ...@@ -121,10 +121,10 @@ def run_speed(model_path, quant_file, device, n_generate=128, n_context=256, bat
# Prints # Prints
memory_used = torch.cuda.max_memory_allocated(device) / (1024 ** 2) memory_used = torch.cuda.max_memory_allocated(device) / (1024 ** 2)
context_tokens_per_second = n_context / context_time context_tokens_per_second = n_context / context_time * batch_size
context_ms_per_token = (context_time*1000) / n_context context_ms_per_token = (context_time*1000) / n_context * batch_size
inference_tokens_per_second = n_generate / generation_time inference_tokens_per_second = n_generate / generation_time * batch_size
inference_ms_per_token = (generation_time*1000) / n_generate inference_ms_per_token = (generation_time*1000) / n_generate * batch_size
print(f"[======] Model summary: {model_path} [======]") print(f"[======] Model summary: {model_path} [======]")
print(f"[*] Load time: {load_time:.2f} seconds") print(f"[*] Load time: {load_time:.2f} seconds")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment