Unverified Commit f3435bab authored by OlivierDehaene's avatar OlivierDehaene Committed by GitHub
Browse files

fix(server): fix deepseekv2 loading (#2266)

parent 53ec0b79
......@@ -34,7 +34,6 @@ from text_generation_server.layers.attention.common import Seqlen
from text_generation_server.layers.layernorm import FastRMSNorm
from text_generation_server.layers.rotary import PositionRotaryEmbedding, get_mscale
from text_generation_server.utils.import_utils import SYSTEM
from text_generation_server.utils.log import log_once
from text_generation_server.utils.weights import Weights
from torch import nn
from transformers.activations import ACT2FN
......@@ -240,7 +239,6 @@ class DeepseekV2Attention(torch.nn.Module):
if config.attention_bias
else None
),
quantize=config.quantize,
)
self.q_a_layernorm = FastRMSNorm.load(
prefix=f"{prefix}.q_a_layernorm",
......@@ -261,7 +259,6 @@ class DeepseekV2Attention(torch.nn.Module):
if config.attention_bias
else None
),
quantize=config.quantize,
)
self.kv_a_layernorm = FastRMSNorm.load(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment