Unverified Commit 39e63dec authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[LoRA] Cleanup LoRA unused code (#29611)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 4a80ad0a
......@@ -34,12 +34,10 @@ class WorkerLoRAManager:
vllm_config: VllmConfig,
device: torch.device,
embedding_modules: dict[str, str],
embedding_padding_modules: list[str],
lora_model_cls: type[LoRAModel] = LoRAModel,
):
self._lora_model_cls = lora_model_cls
self.embedding_modules = embedding_modules
self.embedding_padding_modules = embedding_padding_modules
self._cached_dummy_lora: None | Literal[False] | LoRAModel = False
self.max_num_seqs = vllm_config.scheduler_config.max_num_seqs
self.max_num_batched_tokens = (
......@@ -121,9 +119,7 @@ class WorkerLoRAManager:
lora_model_id=lora_request.lora_int_id,
device="cpu",
dtype=self.lora_config.lora_dtype,
target_embedding_padding=self.vocab_size,
embedding_modules=self.embedding_modules,
embedding_padding_modules=self.embedding_padding_modules,
model_vocab_size=self.vocab_size,
tensorizer_config_dict=lora_request.tensorizer_config_dict,
weights_mapper=hf_to_vllm_mapper,
)
......
......@@ -482,7 +482,6 @@ class ApertusForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(
self,
......
......@@ -419,7 +419,6 @@ class BambaForCausalLM(
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
@classmethod
def get_mamba_state_dtype_from_config(
......
......@@ -457,7 +457,6 @@ class ExaoneForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"wte": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
......@@ -450,7 +450,6 @@ class Exaone4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
......@@ -510,7 +510,6 @@ class FalconH1ForCausalLM(
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
@classmethod
def get_mamba_state_dtype_from_config(
......
......@@ -400,7 +400,6 @@ class GraniteForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
......@@ -497,7 +497,6 @@ class GraniteMoeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
......@@ -601,7 +601,6 @@ class GraniteMoeHybridForCausalLM(
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
@classmethod
def get_mamba_state_dtype_from_config(
......
......@@ -263,7 +263,6 @@ class GraniteMoeSharedForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
......@@ -347,7 +347,6 @@ class SupportsLoRA(Protocol):
# The `embedding_module` and `embedding_padding_modules`
# are empty by default.
embedding_modules: ClassVar[dict[str, str]] = {}
embedding_padding_modules: ClassVar[list[str]] = []
packed_modules_mapping: dict[str, list[str]] = {}
......@@ -359,7 +358,6 @@ class _SupportsLoRAType(Protocol):
packed_modules_mapping: dict[str, list[str]]
embedding_modules: dict[str, str]
embedding_padding_modules: list[str]
@overload
......@@ -379,7 +377,6 @@ def supports_lora(
lora_attrs = (
"packed_modules_mapping",
"embedding_modules",
"embedding_padding_modules",
)
missing_attrs = tuple(attr for attr in lora_attrs if not hasattr(model, attr))
......
......@@ -480,7 +480,6 @@ class JambaForCausalLM(
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
config = vllm_config.model_config.hf_config
......
......@@ -422,7 +422,6 @@ class Lfm2ForCausalLM(
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
@classmethod
def get_mamba_state_dtype_from_config(
......
......@@ -602,7 +602,6 @@ class Lfm2MoeForCausalLM(
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
@classmethod
def get_mamba_state_dtype_from_config(
......
......@@ -528,7 +528,6 @@ class LlamaForCausalLM(
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
# Mistral/Llama models can also be loaded with --load-format mistral
# from consolidated.safetensors checkpoints
......
......@@ -568,7 +568,6 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3):
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
......@@ -305,7 +305,6 @@ class EagleMiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
......@@ -1741,5 +1741,4 @@ class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA):
# so update values before init is called
cls.packed_modules_mapping.update(instance_cls.packed_modules_mapping)
cls.embedding_modules.update(instance_cls.embedding_modules)
cls.embedding_padding_modules += instance_cls.embedding_padding_modules
return instance_cls(vllm_config=vllm_config, prefix=prefix)
......@@ -496,7 +496,6 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts):
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
......@@ -439,7 +439,6 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings",
"lm_head": "output_embeddings",
}
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment