"deploy.sh" did not exist on "9268dc9d887a3d54cd1f008dcb628aaa5b5bd90a"
Unverified Commit f3435bab authored by OlivierDehaene's avatar OlivierDehaene Committed by GitHub
Browse files

fix(server): fix deepseekv2 loading (#2266)

parent 53ec0b79
...@@ -34,7 +34,6 @@ from text_generation_server.layers.attention.common import Seqlen ...@@ -34,7 +34,6 @@ from text_generation_server.layers.attention.common import Seqlen
from text_generation_server.layers.layernorm import FastRMSNorm from text_generation_server.layers.layernorm import FastRMSNorm
from text_generation_server.layers.rotary import PositionRotaryEmbedding, get_mscale from text_generation_server.layers.rotary import PositionRotaryEmbedding, get_mscale
from text_generation_server.utils.import_utils import SYSTEM from text_generation_server.utils.import_utils import SYSTEM
from text_generation_server.utils.log import log_once
from text_generation_server.utils.weights import Weights from text_generation_server.utils.weights import Weights
from torch import nn from torch import nn
from transformers.activations import ACT2FN from transformers.activations import ACT2FN
...@@ -240,7 +239,6 @@ class DeepseekV2Attention(torch.nn.Module): ...@@ -240,7 +239,6 @@ class DeepseekV2Attention(torch.nn.Module):
if config.attention_bias if config.attention_bias
else None else None
), ),
quantize=config.quantize,
) )
self.q_a_layernorm = FastRMSNorm.load( self.q_a_layernorm = FastRMSNorm.load(
prefix=f"{prefix}.q_a_layernorm", prefix=f"{prefix}.q_a_layernorm",
...@@ -261,7 +259,6 @@ class DeepseekV2Attention(torch.nn.Module): ...@@ -261,7 +259,6 @@ class DeepseekV2Attention(torch.nn.Module):
if config.attention_bias if config.attention_bias
else None else None
), ),
quantize=config.quantize,
) )
self.kv_a_layernorm = FastRMSNorm.load( self.kv_a_layernorm = FastRMSNorm.load(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment