Unverified Commit ac736fd8 authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

feat(server): Add native support for PEFT Lora models (#762)

- Will detect `peft` model by finding `adapter_config.json`.
- This triggers a totally dedicated `download-weights` path
- This path, loads the adapter config, finds the base model_id
- It loads the base_model
- Then peft_model
- Then `merge_and_unload()`
- Then `save_pretrained(.., safe_serialization=True)
- Add back the config + tokenizer.merge_and_unload()`
- Then `save_pretrained(.., safe_serialization=True)
- Add back the config + tokenizer.
- The chosen location is a **local folder with the name of the user
  chosen model id**

PROs:

- Easier than to expect user to merge manually
- Barely any change outside of `download-weights` command.
- This means everything will work in a single load.
- Should enable out of the box SM + HFE

CONs:

- Creates a local merged model in unusual location, potentially
  not saved across docker reloads, or ovewriting some files if the PEFT
  itself was local and containing other files ...
parent 8b0d608f
......@@ -716,6 +716,11 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
download_args.push(revision.to_string())
}
// Trust remote code for automatic peft fusion
if args.trust_remote_code {
download_args.push("--trust-remote-code".to_string());
}
// Copy current process env
let mut envs: Vec<(OsString, OsString)> = env::vars_os().collect();
......
This diff is collapsed.
......@@ -30,6 +30,7 @@ transformers = "4.29.2"
einops = "^0.6.1"
texttable = { version = "^1.6.7", optional = true }
datasets = { version = "^2.14.0", optional = true }
peft = "^0.4.0"
[tool.poetry.extras]
accelerate = ["accelerate"]
......
accelerate==0.19.0 ; python_version >= "3.9" and python_version < "4.0"
aiohttp==3.8.5 ; python_version >= "3.9" and python_version < "4.0"
aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "4.0"
async-timeout==4.0.2 ; python_version >= "3.9" and python_version < "4.0"
attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
backoff==2.2.1 ; python_version >= "3.9" and python_version < "4.0"
bitsandbytes==0.38.1 ; python_version >= "3.9" and python_version < "4.0"
certifi==2023.5.7 ; python_version >= "3.9" and python_version < "4.0"
charset-normalizer==3.1.0 ; python_version >= "3.9" and python_version < "4.0"
click==8.1.3 ; python_version >= "3.9" and python_version < "4.0"
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32" or python_version >= "3.9" and python_version < "4.0" and platform_system == "Windows"
datasets==2.14.0 ; python_version >= "3.9" and python_version < "4.0"
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and (sys_platform == "win32" or platform_system == "Windows")
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "4.0"
dill==0.3.7 ; python_version >= "3.9" and python_version < "4.0"
einops==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
filelock==3.12.2 ; python_version >= "3.9" and python_version < "4.0"
frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "4.0"
fsspec==2023.6.0 ; python_version >= "3.9" and python_version < "4.0"
fsspec[http]==2023.6.0 ; python_version >= "3.9" and python_version < "4.0"
googleapis-common-protos==1.59.1 ; python_version >= "3.9" and python_version < "4.0"
grpc-interceptor==0.15.2 ; python_version >= "3.9" and python_version < "4.0"
grpcio-reflection==1.56.0 ; python_version >= "3.9" and python_version < "4.0"
......@@ -29,10 +20,8 @@ jinja2==3.1.2 ; python_version >= "3.9" and python_version < "4.0"
loguru==0.6.0 ; python_version >= "3.9" and python_version < "4.0"
markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "4.0"
mpmath==1.3.0 ; python_version >= "3.9" and python_version < "4.0"
multidict==6.0.4 ; python_version >= "3.9" and python_version < "4.0"
multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "4.0"
networkx==3.1 ; python_version >= "3.9" and python_version < "4.0"
numpy==1.25.0 ; python_version < "4.0" and python_version >= "3.9"
numpy==1.25.0 ; python_version >= "3.9" and python_version < "4.0"
opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "4.0"
opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "4.0"
opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "4.0"
......@@ -43,30 +32,22 @@ opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "4.0"
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "4.0"
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "4.0"
packaging==23.1 ; python_version >= "3.9" and python_version < "4.0"
pandas==2.0.3 ; python_version >= "3.9" and python_version < "4.0"
peft==0.4.0 ; python_version >= "3.9" and python_version < "4.0"
protobuf==4.23.3 ; python_version >= "3.9" and python_version < "4.0"
psutil==5.9.5 ; python_version >= "3.9" and python_version < "4.0"
pyarrow==12.0.1 ; python_version >= "3.9" and python_version < "4.0"
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0"
pyyaml==6.0 ; python_version >= "3.9" and python_version < "4.0"
regex==2023.6.3 ; python_version >= "3.9" and python_version < "4.0"
requests==2.31.0 ; python_version >= "3.9" and python_version < "4.0"
safetensors==0.3.1 ; python_version >= "3.9" and python_version < "4.0"
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "4.0"
setuptools==68.0.0 ; python_version >= "3.9" and python_version < "4.0"
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
sympy==1.12 ; python_version >= "3.9" and python_version < "4.0"
texttable==1.6.7 ; python_version >= "3.9" and python_version < "4.0"
tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "4.0"
torch==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
tqdm==4.65.0 ; python_version >= "3.9" and python_version < "4.0"
transformers==4.29.2 ; python_version >= "3.9" and python_version < "4.0"
typer==0.6.1 ; python_version >= "3.9" and python_version < "4.0"
typing-extensions==4.7.1 ; python_version >= "3.9" and python_version < "4.0"
tzdata==2023.3 ; python_version >= "3.9" and python_version < "4.0"
urllib3==2.0.3 ; python_version >= "3.9" and python_version < "4.0"
win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32"
wrapt==1.15.0 ; python_version >= "3.9" and python_version < "4.0"
xxhash==3.2.0 ; python_version >= "3.9" and python_version < "4.0"
yarl==1.9.2 ; python_version >= "3.9" and python_version < "4.0"
......@@ -6,6 +6,7 @@ from pathlib import Path
from loguru import logger
from typing import Optional
from enum import Enum
from huggingface_hub import hf_hub_download
app = typer.Typer()
......@@ -88,6 +89,7 @@ def download_weights(
auto_convert: bool = True,
logger_level: str = "INFO",
json_output: bool = False,
trust_remote_code: bool = False,
):
# Remove default handler
logger.remove()
......@@ -118,6 +120,12 @@ def download_weights(
) is not None
if not is_local_model:
try:
adapter_config_filename = hf_hub_download(model_id, revision=revision, filename="adapter_config.json")
utils.download_and_unload_peft(model_id, revision, trust_remote_code=trust_remote_code)
except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
pass
# Try to download weights from the hub
try:
filenames = utils.weight_hub_files(model_id, revision, extension)
......
......@@ -54,7 +54,7 @@ class FlashRWSharded(FlashCausalLM):
device,
dtype,
process_group=self.process_group,
aliases={"transformer.word_embeddings.weight": ["lm_head.weight"]},
aliases={"lm_head.weight": ["transformer.word_embeddings.weight"]},
)
config.quantize = quantize
......
from text_generation_server.utils.convert import convert_file, convert_files
from text_generation_server.utils.dist import initialize_torch_distributed
from text_generation_server.utils.weights import Weights
from text_generation_server.utils.peft import download_and_unload_peft
from text_generation_server.utils.hub import (
weight_files,
weight_hub_files,
......@@ -26,6 +27,7 @@ __all__ = [
"weight_files",
"weight_hub_files",
"download_weights",
"download_and_unload_peft",
"EntryNotFoundError",
"HeterogeneousNextTokenChooser",
"LocalEntryNotFoundError",
......
import os
import json
from loguru import logger
import torch
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM, AutoPeftModelForSeq2SeqLM
def download_and_unload_peft(model_id, revision, trust_remote_code):
torch_dtype = torch.float16
logger.info("Peft model detected.")
logger.info("Loading the model it might take a while without feedback")
try:
model = AutoPeftModelForCausalLM.from_pretrained(
model_id,
revision=revision,
torch_dtype=torch_dtype,
trust_remote_code=trust_remote_code,
low_cpu_mem_usage=True,
)
except Exception:
model = AutoPeftModelForSeq2SeqLM.from_pretrained(
model_id,
revision=revision,
torch_dtype=torch_dtype,
trust_remote_code=trust_remote_code,
low_cpu_mem_usage=True,
)
logger.info(f"Loaded.")
logger.info(f"Merging the lora weights.")
base_model_id = model.peft_config["default"].base_model_name_or_path
model = model.merge_and_unload()
os.makedirs(model_id, exist_ok=True)
cache_dir = model_id
logger.info(f"Saving the newly created merged model to {cache_dir}")
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
model.save_pretrained(cache_dir, safe_serialization=True)
model.config.save_pretrained(cache_dir)
tokenizer.save_pretrained(cache_dir)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment