Unverified Commit 66233af7 authored by Zhanwen Chen's avatar Zhanwen Chen Committed by GitHub
Browse files

Use math.prod instead of np.prod for trivial ops (#14142)

parent bf13d409
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""CacheEngine class for managing the KV cache.""" """CacheEngine class for managing the KV cache."""
from math import prod
from typing import List from typing import List
import numpy as np
import torch import torch
from vllm import envs from vllm import envs
...@@ -90,7 +90,7 @@ class CacheEngine: ...@@ -90,7 +90,7 @@ class CacheEngine:
# NOTE this assumption currently only holds for MLA so we only apply # NOTE this assumption currently only holds for MLA so we only apply
# this optimization when `use_mla` is true # this optimization when `use_mla` is true
entry_shape = kv_cache_shape[2:] entry_shape = kv_cache_shape[2:]
entry_size = np.prod(entry_shape) entry_size = prod(entry_shape)
alloc_entry_size = align_to_256bytes(entry_size, self.dtype) alloc_entry_size = align_to_256bytes(entry_size, self.dtype)
alloc_shape = (*kv_cache_shape[:2], alloc_entry_size) alloc_shape = (*kv_cache_shape[:2], alloc_entry_size)
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment