Unverified Commit 16cfe464 authored by CL-ModelCloud's avatar CL-ModelCloud Committed by GitHub
Browse files

Fix gguf loading via Transformers (#2596)



* hf support load gguf file

* code review

* code review

* code clean up

* note about use_fast compat with gguf

---------
Co-authored-by: default avatarQubitium-ModelCloud <qubitium@modelcloud.ai>
parent 888ac292
...@@ -90,6 +90,7 @@ class HFLM(TemplateLM): ...@@ -90,6 +90,7 @@ class HFLM(TemplateLM):
delta: Optional[str] = None, delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False, autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False, gptqmodel: Optional[bool] = False,
gguf_file: Optional[str] = None,
**kwargs, **kwargs,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -164,6 +165,7 @@ class HFLM(TemplateLM): ...@@ -164,6 +165,7 @@ class HFLM(TemplateLM):
pretrained, pretrained,
revision=revision, revision=revision,
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
gguf_file=gguf_file,
) )
# determine which of 'causal' and 'seq2seq' backends to use for HF models # determine which of 'causal' and 'seq2seq' backends to use for HF models
...@@ -178,6 +180,7 @@ class HFLM(TemplateLM): ...@@ -178,6 +180,7 @@ class HFLM(TemplateLM):
revision=revision, revision=revision,
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
use_fast_tokenizer=use_fast_tokenizer, use_fast_tokenizer=use_fast_tokenizer,
gguf_file=gguf_file,
) )
# if we passed `pretrained` as a string, initialize our model now # if we passed `pretrained` as a string, initialize our model now
...@@ -196,6 +199,7 @@ class HFLM(TemplateLM): ...@@ -196,6 +199,7 @@ class HFLM(TemplateLM):
delta=delta, delta=delta,
autogptq=autogptq, autogptq=autogptq,
gptqmodel=gptqmodel, gptqmodel=gptqmodel,
gguf_file=gguf_file,
**kwargs, **kwargs,
) )
...@@ -508,12 +512,14 @@ class HFLM(TemplateLM): ...@@ -508,12 +512,14 @@ class HFLM(TemplateLM):
pretrained: str, pretrained: str,
revision: str = "main", revision: str = "main",
trust_remote_code: bool = False, trust_remote_code: bool = False,
gguf_file: Optional[str] = None,
) -> None: ) -> None:
"""Return the model config for HuggingFace models""" """Return the model config for HuggingFace models"""
self._config = transformers.AutoConfig.from_pretrained( self._config = transformers.AutoConfig.from_pretrained(
pretrained, pretrained,
revision=revision, revision=revision,
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
gguf_file=gguf_file,
) )
def _create_model( def _create_model(
...@@ -535,6 +541,7 @@ class HFLM(TemplateLM): ...@@ -535,6 +541,7 @@ class HFLM(TemplateLM):
delta: Optional[str] = None, delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False, autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False, gptqmodel: Optional[bool] = False,
gguf_file: Optional[str] = None,
**kwargs, **kwargs,
) -> None: ) -> None:
""" """
...@@ -579,6 +586,7 @@ class HFLM(TemplateLM): ...@@ -579,6 +586,7 @@ class HFLM(TemplateLM):
revision=revision, revision=revision,
torch_dtype=get_dtype(dtype), torch_dtype=get_dtype(dtype),
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
gguf_file=gguf_file,
**model_kwargs, **model_kwargs,
) )
else: else:
...@@ -676,6 +684,7 @@ class HFLM(TemplateLM): ...@@ -676,6 +684,7 @@ class HFLM(TemplateLM):
revision: Optional[str] = "main", revision: Optional[str] = "main",
trust_remote_code: Optional[bool] = False, trust_remote_code: Optional[bool] = False,
use_fast_tokenizer: Optional[bool] = True, use_fast_tokenizer: Optional[bool] = True,
gguf_file: Optional[str] = None,
) -> None: ) -> None:
""" """
Helper method during initialization. Helper method during initialization.
...@@ -683,14 +692,21 @@ class HFLM(TemplateLM): ...@@ -683,14 +692,21 @@ class HFLM(TemplateLM):
Create a tokenizer object corresponding to the correct Create a tokenizer object corresponding to the correct
tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed. tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed.
""" """
kwargs = {
"revision": revision,
"trust_remote_code": trust_remote_code,
}
# gguf format embeds tokenizer and is not compatible with hf tokenizer `use_fast` param
if gguf_file is not None:
kwargs["gguf_file"] = gguf_file
else:
kwargs["use_fast"] = use_fast_tokenizer
if tokenizer: if tokenizer:
if isinstance(tokenizer, str): if isinstance(tokenizer, str):
self.tokenizer = transformers.AutoTokenizer.from_pretrained( self.tokenizer = transformers.AutoTokenizer.from_pretrained(
tokenizer, tokenizer, **kwargs
revision=revision,
trust_remote_code=trust_remote_code,
use_fast=use_fast_tokenizer,
) )
else: else:
assert isinstance( assert isinstance(
...@@ -705,10 +721,7 @@ class HFLM(TemplateLM): ...@@ -705,10 +721,7 @@ class HFLM(TemplateLM):
# get the HF hub name via accessor on model # get the HF hub name via accessor on model
model_name = self.model.name_or_path model_name = self.model.name_or_path
self.tokenizer = transformers.AutoTokenizer.from_pretrained( self.tokenizer = transformers.AutoTokenizer.from_pretrained(
model_name, model_name, **kwargs
revision=revision,
trust_remote_code=trust_remote_code,
use_fast=use_fast_tokenizer,
) )
return None return None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment