"vscode:/vscode.git/clone" did not exist on "2efce05dc3c7c1e367617465f8f661a058499e37"
Commit 3c9817d2 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path...

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH  to load models from local path instead of Hugging Face Hub
parent 49204f68
import subprocess
import sys
import os
import tempfile
from vllm.entrypoints.openai.protocol import BatchRequestOutput
from ...utils import models_path_prefix
# ruff: noqa: E501
INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
{"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are an unhelpful assistant."}}
INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "You are a helpful assistant."}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "You are an unhelpful assistant."}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "Hello world!"}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "Hello world!"}}
{"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}"""
......@@ -31,7 +33,7 @@ def test_empty_file():
proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
input_file.name, "-o", output_file.name, "--model",
"intfloat/e5-mistral-7b-instruct"
os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")
], )
proc.communicate()
proc.wait()
......@@ -50,7 +52,7 @@ def test_completions():
proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
input_file.name, "-o", output_file.name, "--model",
"NousResearch/Meta-Llama-3-8B-Instruct"
os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct")
], )
proc.communicate()
proc.wait()
......@@ -75,7 +77,7 @@ def test_completions_invalid_input():
proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
input_file.name, "-o", output_file.name, "--model",
"NousResearch/Meta-Llama-3-8B-Instruct"
os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct")
], )
proc.communicate()
proc.wait()
......@@ -91,7 +93,7 @@ def test_embeddings():
proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
input_file.name, "-o", output_file.name, "--model",
"intfloat/e5-mistral-7b-instruct"
os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")
], )
proc.communicate()
proc.wait()
......
......@@ -9,8 +9,9 @@ from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_engine import BaseModelPath
from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import models_path_prefix
MODEL_NAME = "openai-community/gpt2"
MODEL_NAME = os.path.join(models_path_prefix, "openai-community/gpt2")
CHAT_TEMPLATE = "Dummy chat template for testing {}"
BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
......
......@@ -2,6 +2,7 @@ from http import HTTPStatus
from unittest.mock import MagicMock
import pytest
import os
from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient
......@@ -9,8 +10,9 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse,
LoadLoraAdapterRequest,
UnloadLoraAdapterRequest)
from vllm.entrypoints.openai.serving_engine import BaseModelPath, OpenAIServing
from ...utils import models_path_prefix
MODEL_NAME = "meta-llama/Llama-2-7b"
MODEL_NAME = os.path.join(models_path_prefix, "meta-llama/Llama-2-7b")
BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
LORA_LOADING_SUCCESS_MESSAGE = (
"Success: LoRA adapter '{lora_name}' added successfully.")
......
......@@ -4,9 +4,9 @@ import os
import openai
import pytest
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
@pytest.mark.asyncio
......
import openai # use the official client for correctness check
import pytest
import os
import pytest_asyncio
import requests
......@@ -8,9 +9,10 @@ from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import RemoteOpenAIServer
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
from .test_completion import zephyr_lora_files # noqa: F401
from ...utils import models_path_prefix
# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
@pytest.fixture(scope="module")
......
......@@ -2,13 +2,14 @@ from typing import Dict, List
import openai
import pytest
import os
import pytest_asyncio
from vllm.multimodal.utils import encode_image_base64, fetch_image
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "microsoft/Phi-3.5-vision-instruct"
MODEL_NAME = os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")
MAXIMUM_IMAGES = 2
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
......
......@@ -2,6 +2,7 @@ import warnings
from typing import Optional
import pytest
import os
from PIL import Image
from vllm.assets.image import ImageAsset
......@@ -11,8 +12,9 @@ from vllm.entrypoints.chat_utils import (parse_chat_messages,
from vllm.multimodal import MultiModalDataDict
from vllm.multimodal.utils import encode_image_base64
from vllm.transformers_utils.tokenizer_group import TokenizerGroup
from ..utils import models_path_prefix
PHI3V_MODEL_ID = "microsoft/Phi-3.5-vision-instruct"
PHI3V_MODEL_ID = os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")
@pytest.fixture(scope="module")
......
......@@ -2,14 +2,17 @@ from pathlib import Path
from typing import List
import pytest
import os
import torch
from gguf import GGMLQuantizationType, GGUFReader, ReaderTensor, dequantize
from huggingface_hub import snapshot_download
import vllm._custom_ops as ops
from vllm.utils import seed_everything
from ..utils import models_path_prefix
GGUF_SAMPLE = snapshot_download("Isotr0py/test-gguf-sample")
# GGUF_SAMPLE = snapshot_download("Isotr0py/test-gguf-sample")
GGUF_SAMPLE = os.path.join(models_path_prefix, "Isotr0py/test-gguf-sample")
def get_gguf_sample_tensors(
......
......@@ -6,6 +6,7 @@ from typing import Dict, List, TypedDict
from unittest.mock import MagicMock, patch
import pytest
import os
import ray
import torch
import torch.nn as nn
......@@ -24,6 +25,7 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.sampler import Sampler
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from vllm.model_executor.model_loader import get_model
from utils import models_path_prefix
class ContextIDInfo(TypedDict):
......@@ -158,7 +160,7 @@ def dummy_model_gate_up() -> nn.Module:
@pytest.fixture(scope="session")
def sql_lora_huggingface_id():
# huggingface repo id is used to test lora runtime downloading.
return "yard1/llama-2-7b-sql-lora-test"
return os.path.join(models_path_prefix, "yard1/llama-2-7b-sql-lora-test")
@pytest.fixture(scope="session")
......@@ -170,53 +172,63 @@ def sql_lora_files(sql_lora_huggingface_id):
def mixtral_lora_files():
# Note: this module has incorrect adapter_config.json to test
# https://github.com/vllm-project/vllm/pull/5909/files.
return snapshot_download(repo_id="SangBinCho/mixtral-lora")
# return snapshot_download(repo_id="SangBinCho/mixtral-lora")
return os.path.join(models_path_prefix, "SangBinCho/mixtral-lora")
@pytest.fixture(scope="session")
def gemma_lora_files():
return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")
# return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")
return os.path.join(models_path_prefix, "wskwon/gemma-7b-test-lora")
@pytest.fixture(scope="session")
def chatglm3_lora_files():
return snapshot_download(repo_id="jeeejeee/chatglm3-text2sql-spider")
# return snapshot_download(repo_id="jeeejeee/chatglm3-text2sql-spider")
return os.path.join(models_path_prefix, "jeeejeee/chatglm3-text2sql-spider")
@pytest.fixture(scope="session")
def baichuan_lora_files():
return snapshot_download(repo_id="jeeejeee/baichuan7b-text2sql-spider")
# return snapshot_download(repo_id="jeeejeee/baichuan7b-text2sql-spider")
return os.path.join(models_path_prefix, "jeeejeee/baichuan7b-text2sql-spider")
@pytest.fixture(scope="session")
def baichuan_zero_lora_files():
# all the lora_B weights are initialized to zero.
return snapshot_download(repo_id="jeeejeee/baichuan7b-zero-init")
# return snapshot_download(repo_id="jeeejeee/baichuan7b-zero-init")
return os.path.join(models_path_prefix, "jeeejeee/baichuan7b-zero-init")
@pytest.fixture(scope="session")
def tinyllama_lora_files():
return snapshot_download(repo_id="jashing/tinyllama-colorist-lora")
# return snapshot_download(repo_id="jashing/tinyllama-colorist-lora")
return os.path.join(models_path_prefix, "jashing/tinyllama-colorist-lora")
@pytest.fixture(scope="session")
def phi2_lora_files():
return snapshot_download(repo_id="isotr0py/phi-2-test-sql-lora")
# return snapshot_download(repo_id="isotr0py/phi-2-test-sql-lora")
return os.path.join(models_path_prefix, "isotr0py/phi-2-test-sql-lora")
@pytest.fixture(scope="session")
def long_context_lora_files_16k_1():
return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_1")
# return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_1")
return os.path.join(models_path_prefix, "SangBinCho/long_context_16k_testing_1")
@pytest.fixture(scope="session")
def long_context_lora_files_16k_2():
return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_2")
# return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_2")
return os.path.join(models_path_prefix, "SangBinCho/long_context_16k_testing_2")
@pytest.fixture(scope="session")
def long_context_lora_files_32k():
return snapshot_download(repo_id="SangBinCho/long_context_32k_testing")
# return snapshot_download(repo_id="SangBinCho/long_context_32k_testing")
return os.path.join(models_path_prefix, "SangBinCho/long_context_32k_testing")
@pytest.fixture(scope="session")
......@@ -254,7 +266,7 @@ def llama_2_7b_engine_extra_embeddings():
**kwargs)
with patch("vllm.worker.model_runner.get_model", get_model_patched):
engine = vllm.LLM("meta-llama/Llama-2-7b-hf", enable_lora=False)
engine = vllm.LLM(os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf"), enable_lora=False)
yield engine.llm_engine
del engine
cleanup()
......
from typing import List
import pytest
import os
import vllm
from vllm.lora.request import LoRARequest
from .conftest import cleanup
from ..utils import models_path_prefix
MODEL_PATH = "baichuan-inc/Baichuan-7B"
MODEL_PATH = os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B")
PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501
......
from typing import List
import os
import vllm
from vllm.lora.request import LoRARequest
from ..utils import models_path_prefix
MODEL_PATH = "THUDM/chatglm3-6b"
MODEL_PATH = os.path.join(models_path_prefix, "THUDM/chatglm3-6b")
PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501
......
from typing import List
import pytest
import os
import vllm
from vllm.lora.request import LoRARequest
from vllm.utils import is_hip
from ..utils import models_path_prefix
MODEL_PATH = "google/gemma-7b"
MODEL_PATH = os.path.join(models_path_prefix, "google/gemma-7b")
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
......
from typing import List
import pytest
import os
import ray
import vllm
from vllm.lora.request import LoRARequest
from .conftest import cleanup
from ..utils import models_path_prefix
MODEL_PATH = "meta-llama/Llama-2-7b-hf"
MODEL_PATH = os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf")
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
......
......@@ -3,6 +3,7 @@ from typing import List, Optional, Tuple
import numpy as np
import pytest
import os
import vllm
from vllm import SamplingParams
......@@ -12,6 +13,7 @@ from vllm.model_executor.layers.rotary_embedding import (
LinearScalingRotaryEmbedding)
from .data.long_context_test_data import prompts_and_responses
from ..utils import models_path_prefix
context_len_to_scaling_factor = {
"16k": 4,
......@@ -108,7 +110,7 @@ def lora_llm(long_context_infos):
for info in long_context_infos.values()
]
llm = vllm.LLM("meta-llama/Llama-2-13b-chat-hf",
llm = vllm.LLM(os.path.join(models_path_prefix, "meta-llama/Llama-2-13b-chat-hf"),
enable_lora=True,
max_num_seqs=16,
max_loras=8,
......@@ -124,7 +126,7 @@ def test_rotary_emb_replaced(dist_init):
"""Verify rotary emb in all the layers are replaced"""
from vllm.engine.arg_utils import EngineArgs
from vllm.worker.model_runner import ModelRunner
engine_args = EngineArgs("meta-llama/Llama-2-7b-hf",
engine_args = EngineArgs(os.path.join(models_path_prefix, "meta-llama/Llama-2-13b-chat-hf"),
long_lora_scaling_factors=(4.0, ),
enable_lora=True)
engine_config = engine_args.create_engine_config()
......
......@@ -2,11 +2,13 @@ from typing import List
import pytest
import torch
import os
import vllm
from vllm.lora.request import LoRARequest
from ..utils import models_path_prefix
MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1"
MODEL_PATH = os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1")
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
......
from typing import List
import os
import vllm
from vllm.lora.request import LoRARequest
from ..utils import models_path_prefix
MODEL_PATH = "microsoft/phi-2"
MODEL_PATH = os.path.join(models_path_prefix, "microsoft/phi-2")
PROMPT_TEMPLATE = "### Instruct: {sql_prompt}\n\n### Context: {context}\n\n### Output:" # noqa: E501
......
......@@ -4,12 +4,14 @@ from dataclasses import dataclass
from typing import List
import pytest
import os
import vllm
from vllm.lora.request import LoRARequest
from vllm.utils import is_hip
from .conftest import cleanup
from ..utils import models_path_prefix
@dataclass
......@@ -23,16 +25,16 @@ MODELS: List[ModelWithQuantization]
if is_hip():
MODELS = [
ModelWithQuantization(
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
model_path=os.path.join(models_path_prefix, "TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ"),
quantization="GPTQ"),
]
else:
MODELS = [
ModelWithQuantization(
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
model_path=os.path.join(models_path_prefix, "TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ"),
quantization="AWQ"),
ModelWithQuantization(
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
model_path=os.path.join(models_path_prefix, "TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ"),
quantization="GPTQ"),
]
......
......@@ -8,14 +8,15 @@ from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
from vllm.lora.models import LoRAMapping
from vllm.lora.request import LoRARequest
from vllm.worker.worker import Worker
from ..utils import models_path_prefix
@patch.dict(os.environ, {"RANK": "0"})
def test_worker_apply_lora(sql_lora_files):
worker = Worker(
model_config=ModelConfig(
"meta-llama/Llama-2-7b-hf",
"meta-llama/Llama-2-7b-hf",
os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf"),
os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf"),
tokenizer_mode="auto",
trust_remote_code=False,
seed=0,
......
import os
import time
from typing import List
......@@ -13,9 +14,10 @@ from vllm.sampling_params import SamplingParams
import vllm.envs as envs
from ..conftest import cleanup
from ..utils import models_path_prefix
MODELS = [
"facebook/opt-125m",
os.path.join(models_path_prefix, "facebook/opt-125m"),
]
......
......@@ -7,6 +7,7 @@ from huggingface_hub.utils import LocalEntryNotFoundError
from vllm.model_executor.model_loader.weight_utils import (
download_weights_from_hf, enable_hf_transfer)
from ..utils import models_path_prefix
def test_hf_transfer_auto_activation():
......@@ -31,20 +32,20 @@ def test_download_weights_from_hf():
# if offline is set and model is not cached
huggingface_hub.constants.HF_HUB_OFFLINE = True
with pytest.raises(LocalEntryNotFoundError):
download_weights_from_hf("facebook/opt-125m",
download_weights_from_hf(os.path.join(models_path_prefix, "facebook/opt-125m"),
allow_patterns=["*.safetensors", "*.bin"],
cache_dir=tmpdir)
# download the model
huggingface_hub.constants.HF_HUB_OFFLINE = False
download_weights_from_hf("facebook/opt-125m",
download_weights_from_hf(os.path.join(models_path_prefix, "facebook/opt-125m"),
allow_patterns=["*.safetensors", "*.bin"],
cache_dir=tmpdir)
# now it should work offline
huggingface_hub.constants.HF_HUB_OFFLINE = True
assert download_weights_from_hf(
"facebook/opt-125m",
os.path.join(models_path_prefix, "facebook/opt-125m"),
allow_patterns=["*.safetensors", "*.bin"],
cache_dir=tmpdir) is not None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment