Unverified Commit 12a165d1 authored by KonradSzafer's avatar KonradSzafer Committed by GitHub
Browse files

Add delta weights model loading (#1712)

* added delta weights

* removed debug

* readme update

* better error handling

* autogptq warn

* warn update

* peft and delta error, explicitly deleting _model_delta

* linter fix
parent 7852985b
...@@ -282,6 +282,13 @@ lm_eval --model hf \ ...@@ -282,6 +282,13 @@ lm_eval --model hf \
--device cuda:0 --device cuda:0
``` ```
Models provided as delta weights can be easily loaded using the Hugging Face transformers library. Within --model_args, set the delta argument to specify the delta weights, and use the pretrained argument to designate the relative base model to which they will be applied:
```bash
lm_eval --model hf \
--model_args pretrained=Ejafa/llama_7B,delta=lmsys/vicuna-7b-delta-v1.1 \
--tasks hellaswag
```
[GPTQ](https://github.com/PanQiWei/AutoGPTQ) quantized models can be loaded by specifying their file names in `,autogptq=NAME` (or `,autogptq=True` for default names) in the `model_args` argument: [GPTQ](https://github.com/PanQiWei/AutoGPTQ) quantized models can be loaded by specifying their file names in `,autogptq=NAME` (or `,autogptq=True` for default names) in the `model_args` argument:
```bash ```bash
......
...@@ -107,8 +107,9 @@ class HFLM(TemplateLM): ...@@ -107,8 +107,9 @@ class HFLM(TemplateLM):
max_memory_per_gpu: Optional[Union[int, str]] = None, max_memory_per_gpu: Optional[Union[int, str]] = None,
max_cpu_memory: Optional[Union[int, str]] = None, max_cpu_memory: Optional[Union[int, str]] = None,
offload_folder: Optional[Union[str, os.PathLike]] = "./offload", offload_folder: Optional[Union[str, os.PathLike]] = "./offload",
# PEFT and quantization options # PEFT, delta weights and quantization options
peft: Optional[str] = None, peft: Optional[str] = None,
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False, autogptq: Optional[Union[bool, str]] = False,
**kwargs, **kwargs,
) -> None: ) -> None:
...@@ -210,6 +211,7 @@ class HFLM(TemplateLM): ...@@ -210,6 +211,7 @@ class HFLM(TemplateLM):
max_cpu_memory=max_cpu_memory, max_cpu_memory=max_cpu_memory,
offload_folder=offload_folder, offload_folder=offload_folder,
peft=peft, peft=peft,
delta=delta,
autogptq=autogptq, autogptq=autogptq,
**kwargs, **kwargs,
) )
...@@ -486,8 +488,9 @@ class HFLM(TemplateLM): ...@@ -486,8 +488,9 @@ class HFLM(TemplateLM):
max_memory_per_gpu: Optional[Union[int, str]] = None, max_memory_per_gpu: Optional[Union[int, str]] = None,
max_cpu_memory: Optional[Union[int, str]] = None, max_cpu_memory: Optional[Union[int, str]] = None,
offload_folder: Optional[str] = "./offload", offload_folder: Optional[str] = "./offload",
# PEFT and quantization options # PEFT, delta weights and quantization options
peft: Optional[str] = None, peft: Optional[str] = None,
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False, autogptq: Optional[Union[bool, str]] = False,
**kwargs, **kwargs,
) -> None: ) -> None:
...@@ -563,6 +566,11 @@ class HFLM(TemplateLM): ...@@ -563,6 +566,11 @@ class HFLM(TemplateLM):
**model_kwargs, **model_kwargs,
) )
if peft and delta:
raise ValueError(
"Cannot use both 'peft' and 'delta' options at the same time."
)
if peft: if peft:
if model_kwargs.get("load_in_4bit", None): if model_kwargs.get("load_in_4bit", None):
if version.parse(PEFT_VERSION) < version.parse("0.4.0"): if version.parse(PEFT_VERSION) < version.parse("0.4.0"):
...@@ -570,6 +578,29 @@ class HFLM(TemplateLM): ...@@ -570,6 +578,29 @@ class HFLM(TemplateLM):
self._model = PeftModel.from_pretrained( self._model = PeftModel.from_pretrained(
self._model, peft, revision=revision self._model, peft, revision=revision
) )
elif delta:
if autogptq:
eval_logger.warning(
"Delta weights might trigger unexpected behavior when used with AutoGPTQ."
)
_model_delta = self.AUTO_MODEL_CLASS.from_pretrained(
delta,
revision=revision,
torch_dtype=get_dtype(dtype),
trust_remote_code=trust_remote_code,
**model_kwargs,
)
for name, param in self._model.state_dict().items():
try:
param.data += _model_delta.state_dict()[name]
except KeyError:
raise KeyError(f"Delta model is missing weights for layer: {name}")
except Exception as e:
raise RuntimeError(
f"Failed to add delta weights to layer {name}. Error: {e}"
)
del _model_delta
return None return None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment