"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "5c79b0d6484d7d4c5fe007c3c7ad04c72d3bc59e"
Unverified Commit f2faac74 authored by Yuqi Zhang's avatar Yuqi Zhang Committed by GitHub
Browse files

[Bugfix] Fix cpu usage and cache hit stats reporting on cpu environment (#18674)


Signed-off-by: default avatarzzzyq <zhangyuqi94@gmail.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
parent 279f8545
...@@ -1650,6 +1650,20 @@ class LLMEngine: ...@@ -1650,6 +1650,20 @@ class LLMEngine:
gpu_prefix_cache_hit_rate = self.scheduler[ gpu_prefix_cache_hit_rate = self.scheduler[
0].get_prefix_cache_hit_rate(Device.GPU) 0].get_prefix_cache_hit_rate(Device.GPU)
# Exchange the uasge and cache hit stats between gpu and cpu when
# running on cpu because the cpu_worker.py intentionally reports the
# number of cpu blocks as gpu blocks in favor of cache management.
if self.device_config.device_type == "cpu":
num_total_gpu, num_total_cpu = num_total_cpu, num_total_gpu
gpu_cache_usage_sys, cpu_cache_usage_sys = (
cpu_cache_usage_sys,
gpu_cache_usage_sys,
)
gpu_prefix_cache_hit_rate, cpu_prefix_cache_hit_rate = (
cpu_prefix_cache_hit_rate,
gpu_prefix_cache_hit_rate,
)
# Iteration stats # Iteration stats
num_prompt_tokens_iter = 0 num_prompt_tokens_iter = 0
num_generation_tokens_iter = 0 num_generation_tokens_iter = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment