Unverified Commit 39e63dec authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[LoRA] Cleanup LoRA unused code (#29611)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 4a80ad0a
...@@ -713,7 +713,6 @@ class NemotronHForCausalLM( ...@@ -713,7 +713,6 @@ class NemotronHForCausalLM(
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
@classmethod @classmethod
def get_mamba_state_dtype_from_config( def get_mamba_state_dtype_from_config(
......
...@@ -387,7 +387,6 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps): ...@@ -387,7 +387,6 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
# Mistral/Llama models can also be loaded with --load-format mistral # Mistral/Llama models can also be loaded with --load-format mistral
# from consolidated.safetensors checkpoints # from consolidated.safetensors checkpoints
......
...@@ -617,7 +617,6 @@ class PhiMoEForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -617,7 +617,6 @@ class PhiMoEForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -426,7 +426,6 @@ class SolarForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -426,7 +426,6 @@ class SolarForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -93,7 +93,6 @@ ALL_ATTENTION_FUNCTIONS["vllm"] = vllm_flash_attention_forward ...@@ -93,7 +93,6 @@ ALL_ATTENTION_FUNCTIONS["vllm"] = vllm_flash_attention_forward
class Base(nn.Module, VllmModel, SupportsQuant, SupportsLoRA, SupportsPP): class Base(nn.Module, VllmModel, SupportsQuant, SupportsLoRA, SupportsPP):
embedding_padding_modules = ["lm_head"]
embedding_modules = ["embed_tokens"] # TODO transformers will have a util to get it embedding_modules = ["embed_tokens"] # TODO transformers will have a util to get it
hf_to_vllm_mapper = WeightsMapper( hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={ orig_to_new_prefix={
......
...@@ -43,7 +43,6 @@ class LoRAModelRunnerMixin: ...@@ -43,7 +43,6 @@ class LoRAModelRunnerMixin:
vllm_config, vllm_config,
device, device,
model.embedding_modules, model.embedding_modules,
model.embedding_padding_modules,
) )
return self.lora_manager.create_lora_manager(model) return self.lora_manager.create_lora_manager(model)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment