Commit bb67a24c authored by 王敏's avatar 王敏
Browse files

Merge remote-tracking branch 'origin/v0.11.0-dev' into v0.11.0-dev

parents f687d53c 81eaff62
......@@ -410,8 +410,8 @@ class DeepseekV32ForCausalLM(VerifyAndUpdateConfig):
hf_config = vllm_config.model_config.hf_config
# Mirror the check in vllm/model_executor/models/deepseek_v2.py
# is_v32 = hasattr(hf_config, "index_topk")
# assert is_v32
is_v32 = hasattr(hf_config, "index_topk")
assert is_v32
# For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
# "auto")
......
......@@ -954,8 +954,7 @@ class DeepseekV2MLAAttention(nn.Module):
mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim))
self.scaling = self.scaling * mscale * mscale
# self.is_v32 = hasattr(config, "index_topk")
self.is_v32 = False
self.is_v32 = hasattr(config, "index_topk")
if self.is_v32:
self.indexer = Indexer(vllm_config, config, hidden_size,
......@@ -1176,8 +1175,7 @@ class DeepseekV2Model(nn.Module):
self.config = config
self.vocab_size = config.vocab_size
# self.is_v32 = hasattr(config, "index_topk")
self.is_v32 = False
self.is_v32 = hasattr(config, "index_topk")
if self.is_v32:
topk_tokens = config.index_topk
topk_indices_buffer = torch.empty(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment