Unverified Commit 6a417b86 authored by ajayvohra2005's avatar ajayvohra2005 Committed by GitHub
Browse files

fix neuron performance issue (#13589)

parent d3ea5011
...@@ -76,7 +76,7 @@ class NeuronWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase): ...@@ -76,7 +76,7 @@ class NeuronWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
# Set the number of GPU blocks to be the same as the maximum number of # Set the number of GPU blocks to be the same as the maximum number of
# sequences that can be processed in a single batch. This is equivalent # sequences that can be processed in a single batch. This is equivalent
# to schedule without PagedAttention. # to schedule without PagedAttention.
num_gpu_blocks = self.scheduler_config.max_num_seqs num_gpu_blocks = self.scheduler_config.max_num_seqs + 1
# Swap not yet supported with Neuron backend. # Swap not yet supported with Neuron backend.
num_cpu_blocks = 0 num_cpu_blocks = 0
...@@ -90,7 +90,7 @@ class NeuronWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase): ...@@ -90,7 +90,7 @@ class NeuronWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
# Different values are not tested. # Different values are not tested.
assert num_cpu_blocks == 0 assert num_cpu_blocks == 0
assert num_gpu_blocks == self.scheduler_config.max_num_seqs assert num_gpu_blocks == self.scheduler_config.max_num_seqs + 1
self.cache_config.num_gpu_blocks = num_gpu_blocks self.cache_config.num_gpu_blocks = num_gpu_blocks
self.cache_config.num_cpu_blocks = num_cpu_blocks self.cache_config.num_cpu_blocks = num_cpu_blocks
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment