Commit 3c9817d2 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path...

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH  to load models from local path instead of Hugging Face Hub
parent 49204f68
import subprocess import subprocess
import sys import sys
import os
import tempfile import tempfile
from vllm.entrypoints.openai.protocol import BatchRequestOutput from vllm.entrypoints.openai.protocol import BatchRequestOutput
from ...utils import models_path_prefix
# ruff: noqa: E501 # ruff: noqa: E501
INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} {"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} {"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}""" {"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}""" {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}} INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "You are a helpful assistant."}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are an unhelpful assistant."}} {"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "You are an unhelpful assistant."}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "Hello world!"}} {"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "Hello world!"}}
{"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}""" {"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}"""
...@@ -31,7 +33,7 @@ def test_empty_file(): ...@@ -31,7 +33,7 @@ def test_empty_file():
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
input_file.name, "-o", output_file.name, "--model", input_file.name, "-o", output_file.name, "--model",
"intfloat/e5-mistral-7b-instruct" os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")
], ) ], )
proc.communicate() proc.communicate()
proc.wait() proc.wait()
...@@ -50,7 +52,7 @@ def test_completions(): ...@@ -50,7 +52,7 @@ def test_completions():
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
input_file.name, "-o", output_file.name, "--model", input_file.name, "-o", output_file.name, "--model",
"NousResearch/Meta-Llama-3-8B-Instruct" os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct")
], ) ], )
proc.communicate() proc.communicate()
proc.wait() proc.wait()
...@@ -75,7 +77,7 @@ def test_completions_invalid_input(): ...@@ -75,7 +77,7 @@ def test_completions_invalid_input():
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
input_file.name, "-o", output_file.name, "--model", input_file.name, "-o", output_file.name, "--model",
"NousResearch/Meta-Llama-3-8B-Instruct" os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct")
], ) ], )
proc.communicate() proc.communicate()
proc.wait() proc.wait()
...@@ -91,7 +93,7 @@ def test_embeddings(): ...@@ -91,7 +93,7 @@ def test_embeddings():
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
input_file.name, "-o", output_file.name, "--model", input_file.name, "-o", output_file.name, "--model",
"intfloat/e5-mistral-7b-instruct" os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")
], ) ], )
proc.communicate() proc.communicate()
proc.wait() proc.wait()
......
...@@ -9,8 +9,9 @@ from vllm.entrypoints.openai.protocol import ChatCompletionRequest ...@@ -9,8 +9,9 @@ from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_engine import BaseModelPath from vllm.entrypoints.openai.serving_engine import BaseModelPath
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import models_path_prefix
MODEL_NAME = "openai-community/gpt2" MODEL_NAME = os.path.join(models_path_prefix, "openai-community/gpt2")
CHAT_TEMPLATE = "Dummy chat template for testing {}" CHAT_TEMPLATE = "Dummy chat template for testing {}"
BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)] BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
......
...@@ -2,6 +2,7 @@ from http import HTTPStatus ...@@ -2,6 +2,7 @@ from http import HTTPStatus
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest import pytest
import os
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
...@@ -9,8 +10,9 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse, ...@@ -9,8 +10,9 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse,
LoadLoraAdapterRequest, LoadLoraAdapterRequest,
UnloadLoraAdapterRequest) UnloadLoraAdapterRequest)
from vllm.entrypoints.openai.serving_engine import BaseModelPath, OpenAIServing from vllm.entrypoints.openai.serving_engine import BaseModelPath, OpenAIServing
from ...utils import models_path_prefix
MODEL_NAME = "meta-llama/Llama-2-7b" MODEL_NAME = os.path.join(models_path_prefix, "meta-llama/Llama-2-7b")
BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)] BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
LORA_LOADING_SUCCESS_MESSAGE = ( LORA_LOADING_SUCCESS_MESSAGE = (
"Success: LoRA adapter '{lora_name}' added successfully.") "Success: LoRA adapter '{lora_name}' added successfully.")
......
...@@ -4,9 +4,9 @@ import os ...@@ -4,9 +4,9 @@ import os
import openai import openai
import pytest import pytest
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
@pytest.mark.asyncio @pytest.mark.asyncio
......
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
import requests import requests
...@@ -8,9 +9,10 @@ from vllm.transformers_utils.tokenizer import get_tokenizer ...@@ -8,9 +9,10 @@ from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401 from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
from .test_completion import zephyr_lora_files # noqa: F401 from .test_completion import zephyr_lora_files # noqa: F401
from ...utils import models_path_prefix
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -2,13 +2,14 @@ from typing import Dict, List ...@@ -2,13 +2,14 @@ from typing import Dict, List
import openai import openai
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
from vllm.multimodal.utils import encode_image_base64, fetch_image from vllm.multimodal.utils import encode_image_base64, fetch_image
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "microsoft/Phi-3.5-vision-instruct" MODEL_NAME = os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")
MAXIMUM_IMAGES = 2 MAXIMUM_IMAGES = 2
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA) # Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
......
...@@ -2,6 +2,7 @@ import warnings ...@@ -2,6 +2,7 @@ import warnings
from typing import Optional from typing import Optional
import pytest import pytest
import os
from PIL import Image from PIL import Image
from vllm.assets.image import ImageAsset from vllm.assets.image import ImageAsset
...@@ -11,8 +12,9 @@ from vllm.entrypoints.chat_utils import (parse_chat_messages, ...@@ -11,8 +12,9 @@ from vllm.entrypoints.chat_utils import (parse_chat_messages,
from vllm.multimodal import MultiModalDataDict from vllm.multimodal import MultiModalDataDict
from vllm.multimodal.utils import encode_image_base64 from vllm.multimodal.utils import encode_image_base64
from vllm.transformers_utils.tokenizer_group import TokenizerGroup from vllm.transformers_utils.tokenizer_group import TokenizerGroup
from ..utils import models_path_prefix
PHI3V_MODEL_ID = "microsoft/Phi-3.5-vision-instruct" PHI3V_MODEL_ID = os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -2,14 +2,17 @@ from pathlib import Path ...@@ -2,14 +2,17 @@ from pathlib import Path
from typing import List from typing import List
import pytest import pytest
import os
import torch import torch
from gguf import GGMLQuantizationType, GGUFReader, ReaderTensor, dequantize from gguf import GGMLQuantizationType, GGUFReader, ReaderTensor, dequantize
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
import vllm._custom_ops as ops import vllm._custom_ops as ops
from vllm.utils import seed_everything from vllm.utils import seed_everything
from ..utils import models_path_prefix
GGUF_SAMPLE = snapshot_download("Isotr0py/test-gguf-sample") # GGUF_SAMPLE = snapshot_download("Isotr0py/test-gguf-sample")
GGUF_SAMPLE = os.path.join(models_path_prefix, "Isotr0py/test-gguf-sample")
def get_gguf_sample_tensors( def get_gguf_sample_tensors(
......
...@@ -6,6 +6,7 @@ from typing import Dict, List, TypedDict ...@@ -6,6 +6,7 @@ from typing import Dict, List, TypedDict
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import pytest import pytest
import os
import ray import ray
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -24,6 +25,7 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor ...@@ -24,6 +25,7 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.sampler import Sampler
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from vllm.model_executor.model_loader import get_model from vllm.model_executor.model_loader import get_model
from utils import models_path_prefix
class ContextIDInfo(TypedDict): class ContextIDInfo(TypedDict):
...@@ -158,7 +160,7 @@ def dummy_model_gate_up() -> nn.Module: ...@@ -158,7 +160,7 @@ def dummy_model_gate_up() -> nn.Module:
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def sql_lora_huggingface_id(): def sql_lora_huggingface_id():
# huggingface repo id is used to test lora runtime downloading. # huggingface repo id is used to test lora runtime downloading.
return "yard1/llama-2-7b-sql-lora-test" return os.path.join(models_path_prefix, "yard1/llama-2-7b-sql-lora-test")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
...@@ -170,53 +172,63 @@ def sql_lora_files(sql_lora_huggingface_id): ...@@ -170,53 +172,63 @@ def sql_lora_files(sql_lora_huggingface_id):
def mixtral_lora_files(): def mixtral_lora_files():
# Note: this module has incorrect adapter_config.json to test # Note: this module has incorrect adapter_config.json to test
# https://github.com/vllm-project/vllm/pull/5909/files. # https://github.com/vllm-project/vllm/pull/5909/files.
return snapshot_download(repo_id="SangBinCho/mixtral-lora") # return snapshot_download(repo_id="SangBinCho/mixtral-lora")
return os.path.join(models_path_prefix, "SangBinCho/mixtral-lora")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def gemma_lora_files(): def gemma_lora_files():
return snapshot_download(repo_id="wskwon/gemma-7b-test-lora") # return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")
return os.path.join(models_path_prefix, "wskwon/gemma-7b-test-lora")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def chatglm3_lora_files(): def chatglm3_lora_files():
return snapshot_download(repo_id="jeeejeee/chatglm3-text2sql-spider") # return snapshot_download(repo_id="jeeejeee/chatglm3-text2sql-spider")
return os.path.join(models_path_prefix, "jeeejeee/chatglm3-text2sql-spider")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def baichuan_lora_files(): def baichuan_lora_files():
return snapshot_download(repo_id="jeeejeee/baichuan7b-text2sql-spider") # return snapshot_download(repo_id="jeeejeee/baichuan7b-text2sql-spider")
return os.path.join(models_path_prefix, "jeeejeee/baichuan7b-text2sql-spider")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def baichuan_zero_lora_files(): def baichuan_zero_lora_files():
# all the lora_B weights are initialized to zero. # all the lora_B weights are initialized to zero.
return snapshot_download(repo_id="jeeejeee/baichuan7b-zero-init") # return snapshot_download(repo_id="jeeejeee/baichuan7b-zero-init")
return os.path.join(models_path_prefix, "jeeejeee/baichuan7b-zero-init")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def tinyllama_lora_files(): def tinyllama_lora_files():
return snapshot_download(repo_id="jashing/tinyllama-colorist-lora") # return snapshot_download(repo_id="jashing/tinyllama-colorist-lora")
return os.path.join(models_path_prefix, "jashing/tinyllama-colorist-lora")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def phi2_lora_files(): def phi2_lora_files():
return snapshot_download(repo_id="isotr0py/phi-2-test-sql-lora") # return snapshot_download(repo_id="isotr0py/phi-2-test-sql-lora")
return os.path.join(models_path_prefix, "isotr0py/phi-2-test-sql-lora")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def long_context_lora_files_16k_1(): def long_context_lora_files_16k_1():
return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_1") # return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_1")
return os.path.join(models_path_prefix, "SangBinCho/long_context_16k_testing_1")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def long_context_lora_files_16k_2(): def long_context_lora_files_16k_2():
return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_2") # return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_2")
return os.path.join(models_path_prefix, "SangBinCho/long_context_16k_testing_2")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def long_context_lora_files_32k(): def long_context_lora_files_32k():
return snapshot_download(repo_id="SangBinCho/long_context_32k_testing") # return snapshot_download(repo_id="SangBinCho/long_context_32k_testing")
return os.path.join(models_path_prefix, "SangBinCho/long_context_32k_testing")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
...@@ -254,7 +266,7 @@ def llama_2_7b_engine_extra_embeddings(): ...@@ -254,7 +266,7 @@ def llama_2_7b_engine_extra_embeddings():
**kwargs) **kwargs)
with patch("vllm.worker.model_runner.get_model", get_model_patched): with patch("vllm.worker.model_runner.get_model", get_model_patched):
engine = vllm.LLM("meta-llama/Llama-2-7b-hf", enable_lora=False) engine = vllm.LLM(os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf"), enable_lora=False)
yield engine.llm_engine yield engine.llm_engine
del engine del engine
cleanup() cleanup()
......
from typing import List from typing import List
import pytest import pytest
import os
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from .conftest import cleanup from .conftest import cleanup
from ..utils import models_path_prefix
MODEL_PATH = "baichuan-inc/Baichuan-7B" MODEL_PATH = os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B")
PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501 PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501
......
from typing import List from typing import List
import os
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from ..utils import models_path_prefix
MODEL_PATH = "THUDM/chatglm3-6b" MODEL_PATH = os.path.join(models_path_prefix, "THUDM/chatglm3-6b")
PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501 PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501
......
from typing import List from typing import List
import pytest import pytest
import os
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.utils import is_hip from vllm.utils import is_hip
from ..utils import models_path_prefix
MODEL_PATH = "google/gemma-7b" MODEL_PATH = os.path.join(models_path_prefix, "google/gemma-7b")
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
......
from typing import List from typing import List
import pytest import pytest
import os
import ray import ray
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from .conftest import cleanup from .conftest import cleanup
from ..utils import models_path_prefix
MODEL_PATH = "meta-llama/Llama-2-7b-hf" MODEL_PATH = os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf")
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
......
...@@ -3,6 +3,7 @@ from typing import List, Optional, Tuple ...@@ -3,6 +3,7 @@ from typing import List, Optional, Tuple
import numpy as np import numpy as np
import pytest import pytest
import os
import vllm import vllm
from vllm import SamplingParams from vllm import SamplingParams
...@@ -12,6 +13,7 @@ from vllm.model_executor.layers.rotary_embedding import ( ...@@ -12,6 +13,7 @@ from vllm.model_executor.layers.rotary_embedding import (
LinearScalingRotaryEmbedding) LinearScalingRotaryEmbedding)
from .data.long_context_test_data import prompts_and_responses from .data.long_context_test_data import prompts_and_responses
from ..utils import models_path_prefix
context_len_to_scaling_factor = { context_len_to_scaling_factor = {
"16k": 4, "16k": 4,
...@@ -108,7 +110,7 @@ def lora_llm(long_context_infos): ...@@ -108,7 +110,7 @@ def lora_llm(long_context_infos):
for info in long_context_infos.values() for info in long_context_infos.values()
] ]
llm = vllm.LLM("meta-llama/Llama-2-13b-chat-hf", llm = vllm.LLM(os.path.join(models_path_prefix, "meta-llama/Llama-2-13b-chat-hf"),
enable_lora=True, enable_lora=True,
max_num_seqs=16, max_num_seqs=16,
max_loras=8, max_loras=8,
...@@ -124,7 +126,7 @@ def test_rotary_emb_replaced(dist_init): ...@@ -124,7 +126,7 @@ def test_rotary_emb_replaced(dist_init):
"""Verify rotary emb in all the layers are replaced""" """Verify rotary emb in all the layers are replaced"""
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.worker.model_runner import ModelRunner from vllm.worker.model_runner import ModelRunner
engine_args = EngineArgs("meta-llama/Llama-2-7b-hf", engine_args = EngineArgs(os.path.join(models_path_prefix, "meta-llama/Llama-2-13b-chat-hf"),
long_lora_scaling_factors=(4.0, ), long_lora_scaling_factors=(4.0, ),
enable_lora=True) enable_lora=True)
engine_config = engine_args.create_engine_config() engine_config = engine_args.create_engine_config()
......
...@@ -2,11 +2,13 @@ from typing import List ...@@ -2,11 +2,13 @@ from typing import List
import pytest import pytest
import torch import torch
import os
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from ..utils import models_path_prefix
MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1" MODEL_PATH = os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1")
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
......
from typing import List from typing import List
import os
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from ..utils import models_path_prefix
MODEL_PATH = "microsoft/phi-2" MODEL_PATH = os.path.join(models_path_prefix, "microsoft/phi-2")
PROMPT_TEMPLATE = "### Instruct: {sql_prompt}\n\n### Context: {context}\n\n### Output:" # noqa: E501 PROMPT_TEMPLATE = "### Instruct: {sql_prompt}\n\n### Context: {context}\n\n### Output:" # noqa: E501
......
...@@ -4,12 +4,14 @@ from dataclasses import dataclass ...@@ -4,12 +4,14 @@ from dataclasses import dataclass
from typing import List from typing import List
import pytest import pytest
import os
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.utils import is_hip from vllm.utils import is_hip
from .conftest import cleanup from .conftest import cleanup
from ..utils import models_path_prefix
@dataclass @dataclass
...@@ -23,16 +25,16 @@ MODELS: List[ModelWithQuantization] ...@@ -23,16 +25,16 @@ MODELS: List[ModelWithQuantization]
if is_hip(): if is_hip():
MODELS = [ MODELS = [
ModelWithQuantization( ModelWithQuantization(
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ", model_path=os.path.join(models_path_prefix, "TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ"),
quantization="GPTQ"), quantization="GPTQ"),
] ]
else: else:
MODELS = [ MODELS = [
ModelWithQuantization( ModelWithQuantization(
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ", model_path=os.path.join(models_path_prefix, "TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ"),
quantization="AWQ"), quantization="AWQ"),
ModelWithQuantization( ModelWithQuantization(
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ", model_path=os.path.join(models_path_prefix, "TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ"),
quantization="GPTQ"), quantization="GPTQ"),
] ]
......
...@@ -8,14 +8,15 @@ from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig, ...@@ -8,14 +8,15 @@ from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
from vllm.lora.models import LoRAMapping from vllm.lora.models import LoRAMapping
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.worker.worker import Worker from vllm.worker.worker import Worker
from ..utils import models_path_prefix
@patch.dict(os.environ, {"RANK": "0"}) @patch.dict(os.environ, {"RANK": "0"})
def test_worker_apply_lora(sql_lora_files): def test_worker_apply_lora(sql_lora_files):
worker = Worker( worker = Worker(
model_config=ModelConfig( model_config=ModelConfig(
"meta-llama/Llama-2-7b-hf", os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf"),
"meta-llama/Llama-2-7b-hf", os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf"),
tokenizer_mode="auto", tokenizer_mode="auto",
trust_remote_code=False, trust_remote_code=False,
seed=0, seed=0,
......
import os
import time import time
from typing import List from typing import List
...@@ -13,9 +14,10 @@ from vllm.sampling_params import SamplingParams ...@@ -13,9 +14,10 @@ from vllm.sampling_params import SamplingParams
import vllm.envs as envs import vllm.envs as envs
from ..conftest import cleanup from ..conftest import cleanup
from ..utils import models_path_prefix
MODELS = [ MODELS = [
"facebook/opt-125m", os.path.join(models_path_prefix, "facebook/opt-125m"),
] ]
......
...@@ -7,6 +7,7 @@ from huggingface_hub.utils import LocalEntryNotFoundError ...@@ -7,6 +7,7 @@ from huggingface_hub.utils import LocalEntryNotFoundError
from vllm.model_executor.model_loader.weight_utils import ( from vllm.model_executor.model_loader.weight_utils import (
download_weights_from_hf, enable_hf_transfer) download_weights_from_hf, enable_hf_transfer)
from ..utils import models_path_prefix
def test_hf_transfer_auto_activation(): def test_hf_transfer_auto_activation():
...@@ -31,20 +32,20 @@ def test_download_weights_from_hf(): ...@@ -31,20 +32,20 @@ def test_download_weights_from_hf():
# if offline is set and model is not cached # if offline is set and model is not cached
huggingface_hub.constants.HF_HUB_OFFLINE = True huggingface_hub.constants.HF_HUB_OFFLINE = True
with pytest.raises(LocalEntryNotFoundError): with pytest.raises(LocalEntryNotFoundError):
download_weights_from_hf("facebook/opt-125m", download_weights_from_hf(os.path.join(models_path_prefix, "facebook/opt-125m"),
allow_patterns=["*.safetensors", "*.bin"], allow_patterns=["*.safetensors", "*.bin"],
cache_dir=tmpdir) cache_dir=tmpdir)
# download the model # download the model
huggingface_hub.constants.HF_HUB_OFFLINE = False huggingface_hub.constants.HF_HUB_OFFLINE = False
download_weights_from_hf("facebook/opt-125m", download_weights_from_hf(os.path.join(models_path_prefix, "facebook/opt-125m"),
allow_patterns=["*.safetensors", "*.bin"], allow_patterns=["*.safetensors", "*.bin"],
cache_dir=tmpdir) cache_dir=tmpdir)
# now it should work offline # now it should work offline
huggingface_hub.constants.HF_HUB_OFFLINE = True huggingface_hub.constants.HF_HUB_OFFLINE = True
assert download_weights_from_hf( assert download_weights_from_hf(
"facebook/opt-125m", os.path.join(models_path_prefix, "facebook/opt-125m"),
allow_patterns=["*.safetensors", "*.bin"], allow_patterns=["*.safetensors", "*.bin"],
cache_dir=tmpdir) is not None cache_dir=tmpdir) is not None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment