Unverified Commit 0feca02d authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Improve benchmark scripts (#615)

parent 10143e1a
...@@ -100,11 +100,12 @@ def run_one_batch_size(bs): ...@@ -100,11 +100,12 @@ def run_one_batch_size(bs):
with open("results.jsonl", "a") as fout: with open("results.jsonl", "a") as fout:
res = { res = {
"backend": args.backend,
"input_len": args.input_len, "input_len": args.input_len,
"output_len": args.max_tokens, "output_len": args.max_tokens,
"batch_size": bs, "batch_size": bs,
"latency": latency, "latency": latency,
"output_throughput": output_throughput "output_throughput": output_throughput,
} }
fout.write(json.dumps(res) + "\n") fout.write(json.dumps(res) + "\n")
......
...@@ -52,7 +52,7 @@ class TokenToKVPool: ...@@ -52,7 +52,7 @@ class TokenToKVPool:
# Prefetch buffer # Prefetch buffer
self.prefetch_buffer = torch.empty(0, device="cuda", dtype=torch.int32) self.prefetch_buffer = torch.empty(0, device="cuda", dtype=torch.int32)
self.prefetch_chunk_size = 256 self.prefetch_chunk_size = 512
self.clear() self.clear()
...@@ -67,11 +67,11 @@ class TokenToKVPool: ...@@ -67,11 +67,11 @@ class TokenToKVPool:
if need_size <= buffer_len: if need_size <= buffer_len:
select_index = self.prefetch_buffer[:need_size] select_index = self.prefetch_buffer[:need_size]
self.prefetch_buffer = self.prefetch_buffer[need_size:] self.prefetch_buffer = self.prefetch_buffer[need_size:]
return select_index.to(torch.int32) return select_index
addition_size = need_size - buffer_len addition_size = need_size - buffer_len
alloc_size = max(addition_size, self.prefetch_chunk_size) alloc_size = max(addition_size, self.prefetch_chunk_size)
select_index = torch.nonzero(self.mem_state == 0).squeeze(1)[:alloc_size] select_index = torch.nonzero(self.mem_state == 0).squeeze(1)[:alloc_size].to(torch.int32)
if select_index.shape[0] < addition_size: if select_index.shape[0] < addition_size:
return None return None
...@@ -82,7 +82,7 @@ class TokenToKVPool: ...@@ -82,7 +82,7 @@ class TokenToKVPool:
ret_index = self.prefetch_buffer[:need_size] ret_index = self.prefetch_buffer[:need_size]
self.prefetch_buffer = self.prefetch_buffer[need_size:] self.prefetch_buffer = self.prefetch_buffer[need_size:]
return ret_index.to(torch.int32) return ret_index
def alloc_contiguous(self, need_size): def alloc_contiguous(self, need_size):
# NOTE: This function is deprecated. # NOTE: This function is deprecated.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment