Unverified Commit e9cdd2b1 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[CI/Build] Further decouple HuggingFace implementation from ours during tests (#4166)

parent 65bf2ac1
import contextlib import contextlib
import gc import gc
import os import os
from typing import List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
import pytest import pytest
import torch import torch
from PIL import Image from PIL import Image
from transformers import (AutoModelForCausalLM, AutoProcessor, from transformers import (AutoModelForCausalLM, AutoProcessor, AutoTokenizer,
LlavaForConditionalGeneration) LlavaConfig, LlavaForConditionalGeneration)
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.config import TokenizerPoolConfig, VisionLanguageConfig from vllm.config import TokenizerPoolConfig, VisionLanguageConfig
from vllm.distributed import destroy_model_parallel from vllm.distributed import destroy_model_parallel
from vllm.logger import init_logger
from vllm.sequence import MultiModalData from vllm.sequence import MultiModalData
from vllm.transformers_utils.tokenizer import get_tokenizer
logger = init_logger(__name__)
_TEST_DIR = os.path.dirname(__file__) _TEST_DIR = os.path.dirname(__file__)
_TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")] _TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
...@@ -129,9 +131,7 @@ _STR_DTYPE_TO_TORCH_DTYPE = { ...@@ -129,9 +131,7 @@ _STR_DTYPE_TO_TORCH_DTYPE = {
"float": torch.float, "float": torch.float,
} }
_VISION_LANGUAGE_MODELS = { AutoModelForCausalLM.register(LlavaConfig, LlavaForConditionalGeneration)
"llava-hf/llava-1.5-7b-hf": LlavaForConditionalGeneration,
}
_EMBEDDING_MODELS = [ _EMBEDDING_MODELS = [
"intfloat/e5-mistral-7b-instruct", "intfloat/e5-mistral-7b-instruct",
...@@ -143,23 +143,14 @@ class HfRunner: ...@@ -143,23 +143,14 @@ class HfRunner:
def __init__( def __init__(
self, self,
model_name: str, model_name: str,
tokenizer_name: Optional[str] = None,
dtype: str = "half", dtype: str = "half",
) -> None: ) -> None:
assert dtype in _STR_DTYPE_TO_TORCH_DTYPE assert dtype in _STR_DTYPE_TO_TORCH_DTYPE
torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype] torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype]
self.model_name = model_name self.model_name = model_name
if model_name in _VISION_LANGUAGE_MODELS:
self.model = _VISION_LANGUAGE_MODELS[model_name].from_pretrained( if model_name in _EMBEDDING_MODELS:
model_name,
torch_dtype=torch_dtype,
trust_remote_code=True,
).cuda()
self.processor = AutoProcessor.from_pretrained(
model_name,
torch_dtype=torch_dtype,
)
elif model_name in _EMBEDDING_MODELS:
# Lazy init required for AMD CI # Lazy init required for AMD CI
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
self.model = SentenceTransformer( self.model = SentenceTransformer(
...@@ -172,10 +163,24 @@ class HfRunner: ...@@ -172,10 +163,24 @@ class HfRunner:
torch_dtype=torch_dtype, torch_dtype=torch_dtype,
trust_remote_code=True, trust_remote_code=True,
).cuda() ).cuda()
self.processor = None
if tokenizer_name is None: self.tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name = model_name model_name,
self.tokenizer = get_tokenizer(tokenizer_name, trust_remote_code=True) torch_dtype=torch_dtype,
trust_remote_code=True,
)
try:
self.processor = AutoProcessor.from_pretrained(
model_name,
torch_dtype=torch_dtype,
trust_remote_code=True,
)
except Exception:
logger.warning(
"Unable to auto-load processor from HuggingFace for "
"model %s. Using tokenizer instead.", model_name)
self.processor = self.tokenizer
def generate( def generate(
self, self,
...@@ -187,19 +192,19 @@ class HfRunner: ...@@ -187,19 +192,19 @@ class HfRunner:
if images: if images:
assert len(prompts) == len(images) assert len(prompts) == len(images)
for i, prompt in enumerate(prompts): for i, prompt in enumerate(prompts):
if self.model_name not in _VISION_LANGUAGE_MODELS: processor_kwargs: Dict[str, Any] = {
input_ids = self.tokenizer(prompt, "text": prompt,
return_tensors="pt").input_ids "return_tensors": "pt",
inputs = {"input_ids": input_ids.cuda()} }
else: if images is not None and images[i] is not None:
image = images[i] if images else None processor_kwargs["images"] = images[i]
inputs = self.processor(text=prompt,
images=image, inputs = self.processor(**processor_kwargs)
return_tensors="pt") inputs = {
inputs = { key: value.cuda() if value is not None else None
key: value.cuda() if value is not None else None for key, value in inputs.items()
for key, value in inputs.items() }
}
output_ids = self.model.generate( output_ids = self.model.generate(
**inputs, **inputs,
use_cache=True, use_cache=True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment