Commit 705f6a35 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.5.2' into v0.5.2-dtk24.04.1

parents af837396 4cf256ae
...@@ -5,18 +5,21 @@ Run `pytest tests/kernels/marlin/test_marlin_gemm.py`. ...@@ -5,18 +5,21 @@ Run `pytest tests/kernels/marlin/test_marlin_gemm.py`.
import pytest import pytest
import torch import torch
from tests.quantization.utils import is_quant_method_supported
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.gptq_marlin import (
GPTQ_MARLIN_MAX_PARALLEL, GPTQ_MARLIN_MIN_THREAD_N,
GPTQ_MARLIN_SUPPORTED_GROUP_SIZES, GPTQ_MARLIN_SUPPORTED_NUM_BITS)
from vllm.model_executor.layers.quantization.gptq_marlin_24 import ( from vllm.model_executor.layers.quantization.gptq_marlin_24 import (
GPTQ_MARLIN_24_MAX_PARALLEL, GPTQ_MARLIN_24_MIN_THREAD_N, GPTQ_MARLIN_24_MAX_PARALLEL, GPTQ_MARLIN_24_MIN_THREAD_N,
GPTQ_MARLIN_24_SUPPORTED_GROUP_SIZES, GPTQ_MARLIN_24_SUPPORTED_NUM_BITS) GPTQ_MARLIN_24_SUPPORTED_GROUP_SIZES, GPTQ_MARLIN_24_SUPPORTED_NUM_BITS)
from vllm.model_executor.layers.quantization.utils.marlin_perms import (
marlin_perm)
from vllm.model_executor.layers.quantization.utils.marlin_utils import ( from vllm.model_executor.layers.quantization.utils.marlin_utils import (
MarlinWorkspace, compute_max_diff, is_marlin_supported, marlin_24_quantize, GPTQ_MARLIN_MAX_PARALLEL, GPTQ_MARLIN_MIN_THREAD_N,
marlin_quantize, marlin_weights) GPTQ_MARLIN_SUPPORTED_GROUP_SIZES, GPTQ_MARLIN_SUPPORTED_NUM_BITS,
marlin_permute_scales)
from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
pack_fp8_to_int32)
from vllm.model_executor.layers.quantization.utils.marlin_utils_test import (
MarlinWorkspace, get_weight_perm, marlin_quantize, marlin_weights)
from vllm.model_executor.layers.quantization.utils.marlin_utils_test_24 import (
marlin_24_quantize)
from vllm.model_executor.layers.quantization.utils.quant_utils import ( from vllm.model_executor.layers.quantization.utils.quant_utils import (
gptq_pack, quantize_weights, sort_weights) gptq_pack, quantize_weights, sort_weights)
from vllm.utils import is_hip from vllm.utils import is_hip
...@@ -39,12 +42,19 @@ MNK_FACTORS = [ ...@@ -39,12 +42,19 @@ MNK_FACTORS = [
(67, 13, 11), (67, 13, 11),
] ]
DTYPES = [torch.float16, torch.bfloat16]
def compute_max_diff(output, output_ref):
return torch.mean(torch.abs(output - output_ref)) / torch.mean(
torch.abs(output_ref))
def rand_data(shape): def rand_data(shape, dtype=torch.float16):
return torch.randn(shape, dtype=torch.half, device="cuda") return torch.randn(shape, dtype=dtype, device="cuda")
@pytest.mark.skipif(not is_marlin_supported() or is_hip(), @pytest.mark.skipif(not is_quant_method_supported("gptq_marlin"),
reason="Marlin is not supported on this GPU type.") reason="Marlin is not supported on this GPU type.")
@pytest.mark.parametrize("k_chunk", MARLIN_K_CHUNKS) @pytest.mark.parametrize("k_chunk", MARLIN_K_CHUNKS)
@pytest.mark.parametrize("n_chunk", MARLIN_N_CHUNKS) @pytest.mark.parametrize("n_chunk", MARLIN_N_CHUNKS)
...@@ -91,8 +101,8 @@ def test_marlin_repack(k_chunk, n_chunk, num_bits, group_size, act_order, ...@@ -91,8 +101,8 @@ def test_marlin_repack(k_chunk, n_chunk, num_bits, group_size, act_order,
q_w, g_idx, sort_indices = sort_weights(q_w, g_idx) q_w, g_idx, sort_indices = sort_weights(q_w, g_idx)
# Pack to Marlin format # Pack to Marlin format
marlin_q_w_1 = marlin_weights(q_w, size_k, size_n, num_bits, weight_perm = get_weight_perm(num_bits)
marlin_perm[num_bits]) marlin_q_w_1 = marlin_weights(q_w, size_k, size_n, num_bits, weight_perm)
# Run Marlin repack GPU kernel # Run Marlin repack GPU kernel
marlin_q_w_2 = ops.gptq_marlin_repack( marlin_q_w_2 = ops.gptq_marlin_repack(
...@@ -107,7 +117,7 @@ def test_marlin_repack(k_chunk, n_chunk, num_bits, group_size, act_order, ...@@ -107,7 +117,7 @@ def test_marlin_repack(k_chunk, n_chunk, num_bits, group_size, act_order,
assert torch.allclose(marlin_q_w_1, marlin_q_w_2) assert torch.allclose(marlin_q_w_1, marlin_q_w_2)
@pytest.mark.skipif(not is_marlin_supported() or is_hip(), @pytest.mark.skipif(not is_quant_method_supported("gptq_marlin"),
reason="Marlin is not supported on this GPU type.") reason="Marlin is not supported on this GPU type.")
@pytest.mark.parametrize("k_chunk", MARLIN_K_CHUNKS) @pytest.mark.parametrize("k_chunk", MARLIN_K_CHUNKS)
@pytest.mark.parametrize("n_chunk", MARLIN_N_CHUNKS) @pytest.mark.parametrize("n_chunk", MARLIN_N_CHUNKS)
...@@ -172,7 +182,7 @@ def test_marlin_gemm( ...@@ -172,7 +182,7 @@ def test_marlin_gemm(
assert max_diff < 0.04 assert max_diff < 0.04
@pytest.mark.skipif(not is_marlin_supported() or is_hip(), @pytest.mark.skipif(not is_quant_method_supported("gptq_marlin"),
reason="Marlin is not supported on this GPU type.") reason="Marlin is not supported on this GPU type.")
@pytest.mark.parametrize("k_chunk", MARLIN_24_K_CHUNKS) @pytest.mark.parametrize("k_chunk", MARLIN_24_K_CHUNKS)
@pytest.mark.parametrize("n_chunk", MARLIN_24_N_CHUNKS) @pytest.mark.parametrize("n_chunk", MARLIN_24_N_CHUNKS)
...@@ -218,3 +228,77 @@ def test_marlin_24_gemm(k_chunk, n_chunk, num_bits, group_size, mnk_factors): ...@@ -218,3 +228,77 @@ def test_marlin_24_gemm(k_chunk, n_chunk, num_bits, group_size, mnk_factors):
print("max_diff = {}".format(max_diff)) print("max_diff = {}".format(max_diff))
assert max_diff < 0.04 assert max_diff < 0.04
@pytest.mark.skipif(not is_quant_method_supported("fp8"),
reason="Marlin is not supported on this GPU type.")
@pytest.mark.parametrize("k_chunk", MARLIN_K_CHUNKS)
@pytest.mark.parametrize("n_chunk", MARLIN_N_CHUNKS)
@pytest.mark.parametrize("num_bits", [8])
@pytest.mark.parametrize("group_size", [-1])
@pytest.mark.parametrize("mnk_factors", MNK_FACTORS)
@pytest.mark.parametrize("dtype", DTYPES)
def test_fp8_marlin_gemm(
k_chunk,
n_chunk,
num_bits,
group_size,
mnk_factors,
dtype,
):
m_factor, n_factor, k_factor = mnk_factors
size_m = m_factor
size_k = k_chunk * k_factor
size_n = n_chunk * n_factor
print(f"MNK = {size_m} {size_n} {size_k}")
print(f"groupsize = {group_size}")
a_input = rand_data((size_m, size_k), dtype=dtype)
b_weight = rand_data((size_k, size_n), dtype=dtype)
# WEIGHTS
fp8_weight, weight_scale = ops.scaled_fp8_quant(b_weight, scale=None)
# Repack weights to gptq format (packed int32 elements)
packed_gptq_qweight = pack_fp8_to_int32(fp8_weight)
# Repack weights to marlin format
marlin_qweight = ops.gptq_marlin_repack(
b_q_weight=packed_gptq_qweight,
perm=torch.empty(0, dtype=torch.int, device="cuda"),
size_k=size_k,
size_n=size_n,
num_bits=8,
)
# WEIGHT SCALES
# Currently Marlin doesn't support per-tensor scales, so we
# expand it to channelwise
scales = weight_scale.repeat(1, size_n).to(a_input.dtype).to("cuda")
# Permute scales
marlin_scales = marlin_permute_scales(s=scales,
size_k=size_k,
size_n=size_n,
group_size=-1)
workspace = MarlinWorkspace(size_n, GPTQ_MARLIN_MIN_THREAD_N,
GPTQ_MARLIN_MAX_PARALLEL)
output = ops.fp8_marlin_gemm(
a=a_input,
b_q_weight=marlin_qweight,
b_scales=marlin_scales,
workspace=workspace.scratch,
num_bits=num_bits,
size_m=a_input.shape[0],
size_n=b_weight.shape[1],
size_k=a_input.shape[1],
)
output_ref = torch.matmul(a_input, b_weight)
torch.cuda.synchronize()
max_diff = compute_max_diff(output, output_ref)
print("max_diff = {}".format(max_diff))
assert max_diff < 0.04
...@@ -29,7 +29,7 @@ def torch_moe(a, w1, w2, score, topk): ...@@ -29,7 +29,7 @@ def torch_moe(a, w1, w2, score, topk):
topk_weight.view(B, -1, 1).to(out.dtype)).sum(dim=1) topk_weight.view(B, -1, 1).to(out.dtype)).sum(dim=1)
@pytest.mark.parametrize("m", [512, 222, 33, 1]) @pytest.mark.parametrize("m", [1024 * 128, 512, 222, 33, 1])
@pytest.mark.parametrize("n", [2048, 256, 1024]) @pytest.mark.parametrize("n", [2048, 256, 1024])
@pytest.mark.parametrize("k", [128, 511, 1024]) @pytest.mark.parametrize("k", [128, 511, 1024])
@pytest.mark.parametrize("e", [8, 64]) @pytest.mark.parametrize("e", [8, 64])
...@@ -77,8 +77,8 @@ def test_mixtral_moe(dtype: torch.dtype): ...@@ -77,8 +77,8 @@ def test_mixtral_moe(dtype: torch.dtype):
for i in range(config.num_local_experts): for i in range(config.num_local_experts):
weights = (hf_moe.experts[i].w1.weight.data, weights = (hf_moe.experts[i].w1.weight.data,
hf_moe.experts[i].w3.weight.data) hf_moe.experts[i].w3.weight.data)
vllm_moe.w13_weight[i][:] = torch.cat(weights, dim=0) vllm_moe.experts.w13_weight[i][:] = torch.cat(weights, dim=0)
vllm_moe.w2_weight[i][:] = hf_moe.experts[i].w2.weight.data vllm_moe.experts.w2_weight[i][:] = hf_moe.experts[i].w2.weight.data
# Generate input batch of dimensions [batch_size, seq_len, hidden_dim] # Generate input batch of dimensions [batch_size, seq_len, hidden_dim]
hf_inputs = torch.randn((1, 64, config.hidden_size)).to(dtype).to("cuda") hf_inputs = torch.randn((1, 64, config.hidden_size)).to(dtype).to("cuda")
......
from itertools import accumulate, product from itertools import accumulate, product
from typing import List, Optional from typing import Dict, List, Optional
import pytest import pytest
import torch import torch
...@@ -126,7 +126,7 @@ def test_batched_rotary_embedding( ...@@ -126,7 +126,7 @@ def test_batched_rotary_embedding(
query, query,
key, key,
offsets=torch.zeros(batch_size * seq_len, offsets=torch.zeros(batch_size * seq_len,
dtype=int, dtype=torch.long,
device=device)) device=device))
# Compare the results. # Compare the results.
assert torch.allclose(out_query, assert torch.allclose(out_query,
...@@ -214,20 +214,16 @@ def test_batched_rotary_embedding_multi_lora( ...@@ -214,20 +214,16 @@ def test_batched_rotary_embedding_multi_lora(
def test_rope_module_cache(): def test_rope_module_cache():
MAX_POSITIONS = [123, 1234] MAX_POSITIONS = [123, 1234]
BASES = [10000, 1000000] BASES = [10000, 1000000]
ROPE_SCALINGS = [ ROPE_SCALINGS = (None, {
None, { "type": "linear",
"type": "linear", "factor": (1, )
"factor": (1, ) }, {
}, { "type": "dynamic",
"type": "dynamic", "factor": 1
"factor": 1 })
} settings = (HEAD_SIZES, ROTARY_DIMS, MAX_POSITIONS, BASES, IS_NEOX_STYLE,
] ROPE_SCALINGS, DTYPES)
settings = [ rope_setting_id_map: Dict[str, int] = {}
HEAD_SIZES, ROTARY_DIMS, MAX_POSITIONS, BASES, IS_NEOX_STYLE,
ROPE_SCALINGS, DTYPES
]
rope_setting_id_map = {}
for setting in product(*settings): for setting in product(*settings):
head_size, rotary_dim, max_position, base, \ head_size, rotary_dim, max_position, base, \
is_neox_stype, rope_scaling, dtype = setting is_neox_stype, rope_scaling, dtype = setting
......
"""Kernel test utils""" """Kernel test utils"""
import itertools
import random
from numbers import Number
from typing import Any, List, NamedTuple, Optional, Tuple, Union
import pytest import pytest
import torch
from vllm.attention.backends.abstract import (AttentionBackend,
AttentionMetadata, AttentionType)
from vllm.attention.backends.xformers import XFormersBackend
from vllm.utils import make_tensor_with_pad
# String name of register which may be set in order to
# force auto-selection of attention backend by Attention
# wrapper
STR_BACKEND_ENV_VAR: str = "VLLM_ATTENTION_BACKEND" STR_BACKEND_ENV_VAR: str = "VLLM_ATTENTION_BACKEND"
# Possible string values of STR_BACKEND_ENV_VAR
# register, corresponding to possible backends
STR_FLASHINFER_ATTN_VAL: str = "FLASHINFER"
STR_TORCH_SDPA_ATTN_VAL: str = "TORCH_SDPA"
STR_ROCM_FLASH_ATTN_VAL: str = "ROCM_FLASH"
STR_XFORMERS_ATTN_VAL: str = "XFORMERS"
STR_FLASH_ATTN_VAL: str = "FLASH_ATTN" STR_FLASH_ATTN_VAL: str = "FLASH_ATTN"
STR_INVALID_VAL: str = "INVALID" STR_INVALID_VAL: str = "INVALID"
class QKVInputs(NamedTuple):
'''
Data structure for representing unpacked attention inputs,
query/key/values and their sequence lengths.
Attributes:
* {query,key,value}: unpacked (batch_size x padded_seq_len x
num_heads x head_size) attention inputs
* q_seq_lens: query sequence lengths list
* kv_seq_lens: shared key/value sequence lengths list
'''
query: torch.Tensor
key: torch.Tensor
value: torch.Tensor
q_seq_lens: List[int]
kv_seq_lens: List[int]
class QKVO(NamedTuple):
'''
Data structure for representing unpacked attention inputs,
alongside unpacked known-correct attention output
Attributes:
* qkv: unpacked (batch_size x padded_seq_len x
num_heads x head_size) attention inputs
* ideal_output: unpacked (batch_size x padded_seq_len x
num_heads x head_size) known-correct attention output
'''
qkv: QKVInputs
ideal_output: torch.Tensor
class PackedQKVInputs(NamedTuple):
'''
Data structure for representing packed attention inputs
Attributes:
* {query,key,value}: packed (number_of_tokens x num_heads
x head_size) attention inputs
* q_start_loc_list: list of query start locations within packed tensor
* kv_start_loc_list: shared list of key/value start locations within
packed tensor
* q_seq_lens: query sequence lengths list
* kv_seq_lens: shared key/value sequence lengths list
'''
query: torch.Tensor
key: torch.Tensor
value: torch.Tensor
q_start_loc_list: Optional[List[int]]
kv_start_loc_list: Optional[List[int]]
q_seq_lens: Optional[List[int]]
kv_seq_lens: Optional[List[int]]
class PackedQKVO(NamedTuple):
'''
Data structure for representing packed attention inputs,
alongside packed known-correct attention output
Attributes:
* packed_qkv: packed (number_of_tokens x num_heads
x head_size) attention inputs
* ideal_output: packed (number_of_tokens x num_heads
x head_size) known-correct attention output
'''
packed_qkv: Optional[PackedQKVInputs]
ideal_output: torch.Tensor
class KVMemoryMap(NamedTuple):
'''
Data structure for encapsulating KV cache memory mapping.
Attributes:
* block_tables: KV cache block tables
* slot_mapping: mapping of sequence offset to physical address
'''
block_tables: torch.Tensor
slot_mapping: torch.Tensor
class PhaseTestParameters(NamedTuple):
'''
Data structure for encapsulating the test parameters
for a given test "phase" (prefill or decode phase) and attention
scenario (encoder, decoder-self, encoder/decoder-cross)
Attributes:
* packed_qkvo: packed (number_of_tokens x num_heads
x head_size) attention inputs & known-correct
output
* kv_mmap: KV cache memory mapping, specific to this test phase &
attention scenario
'''
packed_qkvo: PackedQKVO
kv_mmap: Optional[KVMemoryMap]
def maybe_make_int_tensor(
_list: Optional[List[int]],
device: Union[torch.device, str],
) -> torch.Tensor:
'''
Convert Python int list to a 1D int torch.Tensor on `device`
Returns:
* If _list is not None: 1D int torch.Tensor on `device`
* None otherwise
'''
return None if _list is None else torch.tensor(
_list, dtype=torch.int, device=device)
def maybe_make_long_tensor(
_list: Optional[List[int]],
device: Union[torch.device, str],
) -> torch.Tensor:
'''
Convert Python int list to a 1D long torch.Tensor on `device`
Returns:
* If _list is not None: 1D long torch.Tensor on `device`
* None otherwise
'''
return None if _list is None else torch.tensor(
_list, dtype=torch.long, device=device)
def maybe_max(_list: Optional[List]) -> Optional[Number]:
'''
Returns:
* If _list is not None: max(_list)
* None otherwise
'''
return None if _list is None else max(_list)
def make_causal_mask(
q_max_seq_len: int,
kv_max_seq_len: int,
) -> torch.Tensor:
'''
Create a q_max_seq_len x kv_max_seq_len causal mask
Arguments:
* q_max_seq_len: query max seq len
* kv_max_seq_len: key/value max seq len
Returns:
* 2D tensor, q_max_seq_len x kv_max_seq_len
'''
# Create a matrix where entry (i, j) is True if i >= j
mask = torch.triu(torch.ones(q_max_seq_len, kv_max_seq_len), diagonal=1)
# Replace True with float('-inf') and False with 0
mask = mask.masked_fill(mask == 1,
float('-inf')).masked_fill(mask == 0, 0.0)
return mask
def override_backend_env_variable(mpatch: pytest.MonkeyPatch, def override_backend_env_variable(mpatch: pytest.MonkeyPatch,
backend_name: str) -> None: backend_name: str) -> None:
''' '''
...@@ -20,3 +219,724 @@ def override_backend_env_variable(mpatch: pytest.MonkeyPatch, ...@@ -20,3 +219,724 @@ def override_backend_env_variable(mpatch: pytest.MonkeyPatch,
* backend_name: attention backend name to force * backend_name: attention backend name to force
''' '''
mpatch.setenv(STR_BACKEND_ENV_VAR, backend_name) mpatch.setenv(STR_BACKEND_ENV_VAR, backend_name)
def ref_masked_attention(query: torch.Tensor,
key: torch.Tensor,
value: torch.Tensor,
scale: float,
custom_mask: Optional[torch.Tensor] = None,
q_seq_lens: Optional[List] = None,
kv_seq_lens: Optional[List] = None) -> torch.Tensor:
'''
"Golden" masked attention reference. Supports two types of masking:
* Basic attention mask, utilizing {q,kv}_seq_lens args to mask out
padding elements
* Custom attention mask, which can force an arbitrary mask tensor, i.e.
causal
Arguments:
* query: batch_size x q_padded_seq_len x num_heads x head_size
* key: batch_size x kv_padded_seq_len x num_heads x head_size
* value: batch_size x kv_padded_seq_len x num_heads x head_size
* scale: Attention scale factor
* custom_mask: custom attention mask; good place to inject a causal
attention mask
* q_seq_lens: list of unpadded query seq_lens for each batch index
* kv_seq_lens: list of unpadded key/value seq_lens for each batch index
Returns:
* Attention result, batch_size x q_padded_seq_len x num_heads x head_size
'''
assert q_seq_lens is not None
assert kv_seq_lens is not None
batch_size = query.shape[0]
assert (len(q_seq_lens) == batch_size)
assert (len(kv_seq_lens) == batch_size)
attn_weights = scale * torch.einsum("bqhd,bkhd->bhqk", query, key).float()
# Basic attention mask, derived from seq lens
if (q_seq_lens is not None) or (kv_seq_lens is not None):
attn_mask = torch.zeros_like(attn_weights)
if q_seq_lens is not None:
for bdx, plen in enumerate(q_seq_lens):
attn_mask[bdx, :, plen:, :] = -torch.inf
if kv_seq_lens is not None:
for bdx, plen in enumerate(kv_seq_lens):
attn_mask[bdx, :, :, plen:] = -torch.inf
attn_weights = attn_weights + attn_mask.float()
# Custom attention mask
if custom_mask is not None:
attn_weights = attn_weights + custom_mask.float()
attn_weights = torch.softmax(attn_weights, dim=-1).to(value.dtype)
out = torch.einsum("bhqk,bkhd->bqhd", attn_weights, value)
return out
def make_qkv(
batch_size: int,
max_q_seq_len: int,
max_kv_seq_len: Optional[int],
num_heads: int,
head_size: int,
device: Union[torch.device, str],
force_kv_seq_lens: Optional[List[int]] = None,
attn_type: AttentionType = AttentionType.ENCODER_DECODER,
force_max_len: bool = False,
) -> Tuple[QKVInputs, QKVInputs, QKVInputs]:
'''
Construct QKV test tensors for self- and cross-attention.
Generates three query/key/value triplets:
* "Baseline" query/key/value (for input to reference attention function)
* "Prefill" query/key/value (last sequence offset zero'd out, for use as
input to prefill kernel)
* "Decode" query/key/value (only the last sequence offset from baseline,
for use as input to decode kernel)
Each Q/K/V triplet is associated with a list of q seqlens and a list of k/v
seqlens
Arguments:
* batch_size
* max_q_seq_len: max query seq len
* max_kv_seq_len: max key/value seq len
* num_heads
* head_size
* is_encoder_decoder_attn: if True, query seqlen may differ from
key/value seqlen (as is often the case for cross-attention);
o/w, query/key/value seqlens match at each batch index
(max_kv_seq_len is unused)
* force_kv_seq_lens: if not None, overrides kv sequence lengths
* attn_type: encoder, decoder self, or enc/dec cross attention
* force_max_len: if True, all query seqlens are max_q_seq_len; o/w query
seqlens are random in [2,max_q_seq_lens]. Same for key/value seqlens
and max_kv_seq_len, unless forced by is_encoder_decoder_attn=False
* device: CPU or CUDA device
Returns:
* Overall QKVInputs structure (containing full unpacked Q/K/V tensors)
* Prefill QKVInputs structure (containing all but the last sequence offset)
* Decode QKVInputs structure (containing all only the last sequence offset)
'''
if force_max_len:
q_seq_lens = [max_q_seq_len for _ in range(batch_size)]
else:
q_seq_lens = [
random.randint(2, max_q_seq_len) for _ in range(batch_size)
]
kv_seq_lens = None
if force_kv_seq_lens is not None:
kv_seq_lens = force_kv_seq_lens
elif attn_type != AttentionType.ENCODER_DECODER:
# K,V seq lens match Q for self-attention
kv_seq_lens = q_seq_lens
else:
# K,V seq lens are distinct from Q seq lens & random
assert max_kv_seq_len is not None
if force_max_len:
kv_seq_lens = [max_kv_seq_len] * batch_size
else:
kv_seq_lens = [
random.randint(2, max_kv_seq_len) for _ in range(batch_size)
]
query = torch.rand(
(batch_size, max_q_seq_len, num_heads, head_size)).to(device)
key = torch.rand(
(batch_size, max_kv_seq_len, num_heads, head_size)).to(device)
value = torch.rand(
(batch_size, max_kv_seq_len, num_heads, head_size)).to(device)
prefill_query = torch.zeros(
(batch_size, max_q_seq_len, num_heads, head_size)).to(device)
prefill_key = torch.zeros(
(batch_size, max_kv_seq_len, num_heads, head_size)).to(device)
prefill_value = torch.zeros(
(batch_size, max_kv_seq_len, num_heads, head_size)).to(device)
decode_query = torch.zeros(
(batch_size, 1, num_heads, head_size)).to(device)
decode_key = torch.zeros((batch_size, 1, num_heads, head_size)).to(device)
decode_value = torch.zeros(
(batch_size, 1, num_heads, head_size)).to(device)
for bdx, (q_seq_len, kv_seq_len) in enumerate(zip(q_seq_lens,
kv_seq_lens)):
query[bdx, q_seq_len:, :, :] = 0
key[bdx, kv_seq_len:, :, :] = 0
value[bdx, kv_seq_len:, :, :] = 0
prefill_query[bdx,
0:(q_seq_len - 1), :, :] = query[bdx,
0:(q_seq_len - 1), :, :]
prefill_key[bdx,
0:(kv_seq_len - 1), :, :] = key[bdx,
0:(kv_seq_len - 1), :, :]
prefill_value[bdx, 0:(kv_seq_len -
1), :, :] = value[bdx, 0:(kv_seq_len - 1), :, :]
decode_query[bdx, :, :, :] = query[bdx,
(q_seq_len - 1):q_seq_len, :, :]
decode_key[bdx, :, :, :] = key[bdx, (kv_seq_len - 1):kv_seq_len, :, :]
decode_value[bdx, :, :, :] = value[bdx,
(kv_seq_len - 1):kv_seq_len, :, :]
prefill_q_seq_lens = [plen - 1 for plen in q_seq_lens]
prefill_kv_seq_lens = [plen - 1 for plen in kv_seq_lens]
decode_q_seq_lens = [1 for _ in q_seq_lens]
decode_kv_seq_lens = [1 for _ in kv_seq_lens]
return (
QKVInputs(
query, # Overall QKV inputs
key,
value,
q_seq_lens,
kv_seq_lens),
QKVInputs(
prefill_query, # Prefill subset of QKV sequences
prefill_key,
prefill_value,
prefill_q_seq_lens,
prefill_kv_seq_lens),
QKVInputs(
decode_query, # Decode subset of KV sequences
decode_key,
decode_value,
decode_q_seq_lens,
decode_kv_seq_lens))
def pack_tensor(
unpacked_tensor: torch.Tensor, seq_lens: List[int],
device: Union[torch.device, str]) -> Tuple[torch.Tensor, List[int]]:
'''
Pack a batch_size x padded_seq_len x num_heads x head_size tensor into an
unpadded number_of_tokens x num_heads x head_size tensor, where
number_of_tokens = sum(seq_lens)
Arguments:
* unpacked_tensor: batch_size x padded_seq_len x num_heads x head_size
* seq_lens: list of token counts for each seq
* device: CPU or CUDA device
Returns
* packed_tensor: number_of_tokens x num_heads x head_size
* start_loc_list: start idx of each batch elt in packed_tensor; [0] +
list(itertools.accumulate(seq_lens))
'''
num_tok = sum(seq_lens)
num_heads = unpacked_tensor.shape[-2]
head_size = unpacked_tensor.shape[-1]
start_loc_list = [0] + list(itertools.accumulate(seq_lens))
packed_tensor = torch.zeros((num_tok, num_heads, head_size), device=device)
for bdx, (seq_len, start_loc) in enumerate(zip(seq_lens, start_loc_list)):
packed_tensor[start_loc:(
start_loc + seq_len), :, :] = unpacked_tensor[bdx, :seq_len, :, :]
return packed_tensor, start_loc_list
def pack_qkv(qkv: QKVInputs, device: Union[torch.device,
str]) -> PackedQKVInputs:
'''
Individually pack each of Q, K and V, each with dimensions batch_size x
padded_seq_len x num_heads x head_size, into respective number_of_tokens x
num_heads x head_size tensors.
For Q, number_of_tokens = sum(q_seq_lens).
For K and V, number_of_tokens = sum(kv_seq_lens)
Arguments:
* qkv: Unpacked (batch_size x padded_seq_len x num_heads x head_size)
attention inputs
* device: CPU or CUDA device
Returns
* Packed (number_of_tokens x num_heads x head_size) QKV inputs
derived from unpacked inputs
'''
if qkv.query is None:
packed_query = None
q_start_loc_list = None
else:
packed_query, q_start_loc_list = pack_tensor(qkv.query,
qkv.q_seq_lens,
device=device)
packed_key, kv_start_loc_list = pack_tensor(qkv.key,
qkv.kv_seq_lens,
device=device)
packed_value, _ = pack_tensor(qkv.value, qkv.kv_seq_lens, device=device)
return PackedQKVInputs(
packed_query, packed_key, packed_value, q_start_loc_list,
kv_start_loc_list,
(None if q_start_loc_list is None else qkv.q_seq_lens),
qkv.kv_seq_lens)
def make_backend(backend_name: str) -> AttentionBackend:
'''
Construct the backend instance determined by the backend_name string
argument.
"XFORMERS" -> construct xformers backend
TODO: other backends
Note: at time of writing the Attention wrapper automatically selects
its own backend for Attention.forward(); so the backend instance which
you generate with this function is not meant to be used for *running*
inference, but rather for generating compatible metadata structures
using backend.make_metadata()
Returns:
* Backend instance
'''
if backend_name == STR_XFORMERS_ATTN_VAL:
return XFormersBackend()
raise AssertionError(
f"Unrecognized backend_name {backend_name} for unit test")
def _make_metadata_tensors(
seq_lens: Optional[List[int]], context_lens: Optional[List[int]],
encoder_seq_lens: Optional[List[int]], device: Union[torch.device, str]
) -> Tuple[torch.Tensor, torch.Tensor, Any, Any, Optional[List[int]],
torch.Tensor, Optional[int]]:
'''
Build scalar & tensor values required to build attention metadata structure.
Arguments:
* seq_lens: list of token-counts for each decoder input seq
* context_lens: list of context length values for each seq
* encoder_seq_lens: list of token-counts for each encoder input seq
* device: CPU or CUDA device
Returns:
* seq_lens_tensor: decoder seq_lens list, as tensor
* context_lens_tensor: context_lens list, as tensor
* max_context_len: max(context_lens)
* max_seq_len: max(seq_lens)
* seq_start_loc: start idx of each sequence
* max_encoder_seq_len: encoder seq_lens list, as tensor
'''
seq_lens_tensor = maybe_make_int_tensor(seq_lens, device)
context_lens_tensor = maybe_make_int_tensor(context_lens, device)
max_context_len = maybe_max(context_lens)
max_seq_len = maybe_max(seq_lens)
encoder_seq_lens_tensor = maybe_make_int_tensor(encoder_seq_lens, device)
max_encoder_seq_len = (None if encoder_seq_lens is None else
max(encoder_seq_lens))
seq_start_loc = None
return (seq_lens_tensor, context_lens_tensor, max_context_len, max_seq_len,
seq_start_loc, encoder_seq_lens_tensor, max_encoder_seq_len)
def make_kv_cache(num_blocks: int,
num_heads: int,
head_size: int,
block_size: int,
device: Union[torch.device, str],
default_val: float = 0.0) -> torch.Tensor:
'''
Create a fake KV cache.
Arguments:
* num_blocks: number of blocks in the KV cache
* num_heads: number of attention heads
* head_size: head dimension
* block_size: number of offsets within a block
* device: CPU or CUDA device
* default_val: initialization value for KV cache elements
Returns:
* kv_cache: 2 x num_blocks x (block_size * num_heads * head_size)
'''
kv_cache = torch.rand(
(2, num_blocks, block_size * num_heads * head_size)).to(device)
if default_val is not None:
kv_cache[:, :, :] = default_val
return kv_cache
def _num_tokens_to_min_blocks(num_tokens: int, block_size: int) -> int:
'''
Compute the minimum number of blocks required to hold num_tokens tokens,
given block_size
'''
return (num_tokens + block_size) // block_size
def make_empty_slot_mapping_tensor(device: Union[torch.device, str]):
return maybe_make_long_tensor([], device)
def make_empty_block_tables_tensor(device: Union[torch.device, str]):
return torch.tensor([], device=device)
def split_slot_mapping(slot_mapping_list: torch.Tensor, seq_lens: List[int],
device: Union[torch.device, str]):
'''
Split a slot mapping into valid prefill- and decode-phase slot mappings.
Context:
* Your goal is to test (1) prefill of N prompts, with prompt-lengths
{K_i \\forall i \\in [0,N)}, followed by (2) decoding of a single token
for all N prompts (N tokens total); the resultant sequence lengths
after decode would be {K_i + 1 for i \\in [0,N)}
* The test you want to do requires (1) having the prefill slot mapping
for all tokens present during prefill, the number of which is
M = \\sum_i{K_i}, and (2) having the decode slot mapping for all N
decoded tokens
This function consumes a single 1D slot mapping, which is the
concatenation of N slot mappings each of length K_i + 1 (corresponding
to the sequence lengths after decode), with a total length of
P = \\sum_i{K_i + 1} = M + N
The prefill-phase slot mapping results from excising the (K_i + 1)-th entry
from each of the N subsequences in the slot mapping (i.e. omitting the
decoded token's mapping.)
The N excised entries are appended to obtain the decode-phase slot mapping
Arguments:
* slot_mapping_list: Length-P 1D slot mapping (as List) reflecting all N
post-decode sequences
* seq_lens: List of N post-decode sequence lengths (K_i + 1 in the
description above)
* device: cuda, cpu, etc.
Returns:
* prefill_slot_mapping: Length-M 1D slot mapping (as Tensor)
reflecting all N prefill prompts
* decode_slot_mapping: Length-N 1D slot mapping (as Tensor) reflecting
all N decoded tokens
'''
prefill_slot_mapping = []
decode_slot_mapping = []
base_idx = 0
for seq_len in seq_lens:
prefill_slot_mapping.extend(slot_mapping_list[base_idx:(base_idx +
seq_len - 1)])
decode_slot_mapping.append(slot_mapping_list[base_idx + seq_len - 1])
base_idx += seq_len
return (maybe_make_long_tensor(prefill_slot_mapping, device),
maybe_make_long_tensor(decode_slot_mapping, device))
def make_block_tables_slot_mapping(
block_size: int,
seq_lens: List[int],
device: Union[torch.device, str],
block_base_addr: int = 0) -> Tuple[torch.Tensor, List[int], int]:
'''
Construct fake block tables & slot mappings.
For a sequence with num_tokens tokens the minimum number
of required KV cache blocks is
num_blocks = (num_tokens + block_size) // block_size
Then the minimum KV cache size in blocks is
total_cache_blocks = sum(num_blocks for all seqs)
Then, the blocktable mapping counts downward from
block_base_addr + total_cache_blocks
to
block_base_addr
The constructed block-tables and slot-mapping are sized to the
lengths of the sequences in their entirety (as reflected by seq_lens),
i.e. the total of prefill prompt tokens + decoded tokens.
Arguments:
* block_size: number of offsets per block
* seq_lens: list of token-counts for each sequence
* block_base_addr: the block table base address
* device: CPU or CUDA device
Return:
* block_tables_tensor: block table for sequence
* slot_mapping_list: slot mapping for sequence
* max_block_idx: the highest block address within this block table
'''
# Provision minimum number of KV cache blocks
num_blocks_list = [
_num_tokens_to_min_blocks(num_tokens, block_size)
for num_tokens in seq_lens
]
max_block_table_len = max(num_blocks_list)
block_table_pad_tokens = 10
block_tables = []
slot_mapping_list = []
# Compute uppermost address of block table
total_cache_blocks = sum(num_blocks_list)
block_base_idx = block_base_addr + total_cache_blocks
max_block_idx = block_base_idx
for sdx, num_tokens in enumerate(seq_lens):
num_blocks = num_blocks_list[sdx]
block_table = list(
range(block_base_idx, block_base_idx - num_blocks, -1))
for idx in range(num_tokens):
mapping_value = (
idx % block_size) + block_table[idx // block_size] * block_size
slot_mapping_list.append(mapping_value)
block_base_idx -= num_blocks
block_tables.append(block_table)
block_tables_tensor = make_tensor_with_pad(
block_tables,
max_len=max_block_table_len + block_table_pad_tokens,
pad=0,
dtype=torch.int,
device=device,
)
return (block_tables_tensor, slot_mapping_list, max_block_idx)
def make_test_metadata(
attn_backend: AttentionBackend,
is_prompt: bool,
seq_lens: Optional[List[int]],
decoder_test_params: Optional[PhaseTestParameters],
device: Union[torch.device, str],
encoder_test_params: Optional[PhaseTestParameters] = None,
cross_test_params: Optional[PhaseTestParameters] = None
) -> AttentionMetadata:
'''
Construct fake attention metadata for a given test phase
(prefill-phase or decode-phase).
encoder_test_params and cross_test_params arguments allow encoder
attention and enc/dec cross-attention (respectively) to use distinct
metadata values from decoder self-attention (decoder_test_params.)
if encoder_test_params and cross_test_params are None, the attention
metadata will support decoder-only scenario.
Assumptions:
* No chunked prefill -> a batch is 100% prefill or 100% decode, never both
Arguments:
* attn_backend: Backend for sourcing attention kernels
* is_prompt: prefill if True, o/w decode
* seq_lens: list of token counts for each sequence
* decoder_test_params: decoder self-attention test params;
this function requires
kv_mmap (memory mapping) field
* device: CPU or CUDA device
* encoder_test_params: encoder attention test params;
this function requires encoder query
sequence lengths field. If None,
encoder query sequence lengths are
treated as None
* cross_test_params: enc/dec cross-attention test params;
this function requires kv_mmap field.
If None, KV cache memory map data
structures are treated as None
Return:
* AttentionMetadata structure
'''
# Decoder self-attention memory mapping
# decoder_test_params is None signals encoder-only
# scenario, so kv_mmap is None
kv_mmap = (None
if decoder_test_params is None else decoder_test_params.kv_mmap)
# This function constructs metadata assuming no chunked prefill,
# i.e. 100% prefill tokens or 100% decode tokens
#
# - If is_prompt, num_prefills_or_decodes is the number of prefills
# and num_prefill_or_decode_tokens is the number of prefill tokens
# - If not is_prompt, num_prefills_or_decodes is the number of decodes
# and num_prefill_or_decode_tokens is the number of decode tokens
#
# seq_lens is None signals encoder-only
# scenario, in which case num_prefills_or_decodes and
# num_prefill_or_decode_tokens are unused
num_prefills_or_decodes = (None if seq_lens is None else len(seq_lens))
num_prefill_or_decode_tokens = (None if seq_lens is None else (
sum(seq_lens) if is_prompt else len(seq_lens)))
# Seems for non-prefix-caching scenarios context_lens
# is never needed
context_lens = None
if encoder_test_params is None:
encoder_seq_lens = None
num_encoder_tokens = None
else:
# Encoder/decoder or encoder-only models only:
# * Extract encoder input sequence lengths
assert encoder_test_params.packed_qkvo.packed_qkv is not None
encoder_seq_lens = encoder_test_params.packed_qkvo.packed_qkv.q_seq_lens
num_encoder_tokens = (None if encoder_seq_lens is None else
(sum(encoder_seq_lens)))
if cross_test_params is None:
cross_kv_mmap = None
else:
# Encoder/decoder or encoder-only models only:
# * Extract *cross-attention* slot_mapping and block table
# (kv_mmap)
cross_kv_mmap = cross_test_params.kv_mmap
if is_prompt:
# Prefill-phase scenario
num_prefills = num_prefills_or_decodes
num_prefill_tokens = num_prefill_or_decode_tokens
num_decode_tokens = 0
(
seq_lens_tensor,
context_lens_tensor,
_,
_,
_,
encoder_seq_lens_tensor,
max_encoder_seq_len,
) = _make_metadata_tensors(seq_lens,
context_lens,
encoder_seq_lens,
device=device)
return attn_backend.make_metadata(
num_prefills=num_prefills,
slot_mapping=(None if kv_mmap is None else kv_mmap.slot_mapping),
num_prefill_tokens=num_prefill_tokens,
num_decode_tokens=num_decode_tokens,
seq_lens=seq_lens,
seq_lens_tensor=seq_lens_tensor,
max_prefill_seq_len=None if seq_lens is None else max(seq_lens),
max_decode_seq_len=0,
context_lens_tensor=context_lens_tensor,
block_tables=(None if kv_mmap is None else kv_mmap.block_tables),
use_cuda_graph=False,
num_encoder_tokens=num_encoder_tokens,
encoder_seq_lens=encoder_seq_lens,
encoder_seq_lens_tensor=encoder_seq_lens_tensor,
max_encoder_seq_len=max_encoder_seq_len,
cross_slot_mapping=(None if cross_kv_mmap is None else
cross_kv_mmap.slot_mapping),
cross_block_tables=(None if cross_kv_mmap is None else
cross_kv_mmap.block_tables))
else: # not is_prompt
# Decode-phase scenario
assert kv_mmap is not None
assert num_prefill_or_decode_tokens is not None
assert seq_lens is not None
num_prefills = 0
num_prefill_tokens = 0
num_decode_tokens = num_prefill_or_decode_tokens
(
seq_lens_tensor,
context_lens_tensor,
_,
_,
_,
encoder_seq_lens_tensor,
max_encoder_seq_len,
) = _make_metadata_tensors(seq_lens,
context_lens,
encoder_seq_lens,
device=device)
return attn_backend.make_metadata(
num_prefills=num_prefills,
slot_mapping=kv_mmap.slot_mapping,
num_prefill_tokens=num_prefill_tokens,
num_decode_tokens=num_decode_tokens,
seq_lens=seq_lens,
seq_lens_tensor=seq_lens_tensor,
max_prefill_seq_len=0,
max_decode_seq_len=max(seq_lens),
context_lens_tensor=context_lens_tensor,
block_tables=kv_mmap.block_tables,
use_cuda_graph=False,
num_encoder_tokens=num_encoder_tokens,
encoder_seq_lens=encoder_seq_lens,
encoder_seq_lens_tensor=encoder_seq_lens_tensor,
max_encoder_seq_len=max_encoder_seq_len,
cross_slot_mapping=(None if cross_kv_mmap is None else
cross_kv_mmap.slot_mapping),
cross_block_tables=(None if cross_kv_mmap is None else
cross_kv_mmap.block_tables))
def assert_actual_matches_ideal(test_params: PhaseTestParameters,
output_under_test: torch.Tensor) -> None:
'''
Assert that observed output matches the ideal output
contained in the test parameters data structure.
Arguments:
* test_params: Test parameters including packed ideal output
* output_under_test: actually observed output value
'''
ideal_output = test_params.packed_qkvo.ideal_output
assert torch.allclose(ideal_output,
output_under_test.view_as(ideal_output))
...@@ -2,6 +2,7 @@ import contextlib ...@@ -2,6 +2,7 @@ import contextlib
import gc import gc
import tempfile import tempfile
from collections import OrderedDict from collections import OrderedDict
from typing import Dict, List, TypedDict
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import pytest import pytest
...@@ -12,7 +13,10 @@ from huggingface_hub import snapshot_download ...@@ -12,7 +13,10 @@ from huggingface_hub import snapshot_download
import vllm import vllm
from vllm.config import LoRAConfig from vllm.config import LoRAConfig
from vllm.distributed import destroy_model_parallel, initialize_model_parallel from vllm.distributed import (destroy_distributed_environment,
destroy_model_parallel,
init_distributed_environment,
initialize_model_parallel)
from vllm.model_executor.layers.linear import (ColumnParallelLinear, from vllm.model_executor.layers.linear import (ColumnParallelLinear,
MergedColumnParallelLinear, MergedColumnParallelLinear,
RowParallelLinear) RowParallelLinear)
...@@ -21,7 +25,18 @@ from vllm.model_executor.layers.sampler import Sampler ...@@ -21,7 +25,18 @@ from vllm.model_executor.layers.sampler import Sampler
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from vllm.model_executor.model_loader import get_model from vllm.model_executor.model_loader import get_model
LONG_LORA_INFOS = [{
class ContextIDInfo(TypedDict):
lora_id: int
context_length: str
class ContextInfo(TypedDict):
lora: str
context_length: str
LONG_LORA_INFOS: List[ContextIDInfo] = [{
"lora_id": 1, "lora_id": 1,
"context_length": "16k", "context_length": "16k",
}, { }, {
...@@ -35,6 +50,7 @@ LONG_LORA_INFOS = [{ ...@@ -35,6 +50,7 @@ LONG_LORA_INFOS = [{
def cleanup(): def cleanup():
destroy_model_parallel() destroy_model_parallel()
destroy_distributed_environment()
with contextlib.suppress(AssertionError): with contextlib.suppress(AssertionError):
torch.distributed.destroy_process_group() torch.distributed.destroy_process_group()
gc.collect() gc.collect()
...@@ -64,15 +80,14 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool): ...@@ -64,15 +80,14 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
@pytest.fixture @pytest.fixture
def dist_init(): def dist_init():
if not torch.distributed.is_initialized(): temp_file = tempfile.mkstemp()[1]
temp_file = tempfile.mkstemp()[1] init_distributed_environment(
torch.distributed.init_process_group( world_size=1,
backend="nccl", rank=0,
world_size=1, distributed_init_method=f"file://{temp_file}",
rank=0, local_rank=0,
init_method=f"file://{temp_file}", backend="nccl",
) )
torch.distributed.all_reduce(torch.zeros(1).cuda())
initialize_model_parallel(1, 1) initialize_model_parallel(1, 1)
yield yield
cleanup() cleanup()
...@@ -150,7 +165,9 @@ def sql_lora_files(): ...@@ -150,7 +165,9 @@ def sql_lora_files():
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def mixtral_lora_files(): def mixtral_lora_files():
return snapshot_download(repo_id="terrysun/mixtral-lora-adapter") # Note: this module has incorrect adapter_config.json to test
# https://github.com/vllm-project/vllm/pull/5909/files.
return snapshot_download(repo_id="SangBinCho/mixtral-lora")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
...@@ -204,7 +221,7 @@ def long_context_infos(long_context_lora_files_16k_1, ...@@ -204,7 +221,7 @@ def long_context_infos(long_context_lora_files_16k_1,
long_context_lora_files_16k_2, long_context_lora_files_16k_2,
long_context_lora_files_32k): long_context_lora_files_32k):
cleanup() cleanup()
infos = {} infos: Dict[int, ContextInfo] = {}
for lora_checkpoint_info in LONG_LORA_INFOS: for lora_checkpoint_info in LONG_LORA_INFOS:
lora_id = lora_checkpoint_info["lora_id"] lora_id = lora_checkpoint_info["lora_id"]
if lora_id == 1: if lora_id == 1:
...@@ -223,7 +240,7 @@ def long_context_infos(long_context_lora_files_16k_1, ...@@ -223,7 +240,7 @@ def long_context_infos(long_context_lora_files_16k_1,
@pytest.fixture @pytest.fixture
def llama_2_7b_engine_extra_embeddings() -> nn.Module: def llama_2_7b_engine_extra_embeddings():
cleanup() cleanup()
get_model_old = get_model get_model_old = get_model
...@@ -241,7 +258,6 @@ def llama_2_7b_engine_extra_embeddings() -> nn.Module: ...@@ -241,7 +258,6 @@ def llama_2_7b_engine_extra_embeddings() -> nn.Module:
@pytest.fixture @pytest.fixture
def llama_2_7b_model_extra_embeddings( def llama_2_7b_model_extra_embeddings(llama_2_7b_engine_extra_embeddings):
llama_2_7b_engine_extra_embeddings) -> nn.Module:
yield (llama_2_7b_engine_extra_embeddings.model_executor.driver_worker. yield (llama_2_7b_engine_extra_embeddings.model_executor.driver_worker.
model_runner.model) model_runner.model)
# ruff: noqa # ruff: noqa
"""This file contains a dictionary of prompts and golden responses.""" """This file contains a dictionary of prompts and golden responses."""
prompts_and_responses = { from typing import Dict, List, TypedDict
class DateJSON(TypedDict):
day: int
month: int
year: int
class AnswerJSON(TypedDict):
nationality: str
date_of_birth: DateJSON
date_of_death: DateJSON
politician: bool
sportsperson: bool
class PromptResponse(TypedDict):
prompt: str
golden_answer: AnswerJSON
prompts_and_responses: Dict[str, List[PromptResponse]] = {
"16k": [{ "16k": [{
"prompt": "prompt":
"[INST] <<SYS>>\nYou are a helpful assistant that extracts information about a person in json.\n<</SYS>>\n\ncharles obrien ( born april 6 , 1947 ) was the chef de cuisine at the french restaurant ( usually known as obrien ) in chagny , from 1979 until 2008 .moises hulett ( born february 14 , 1983 ) is an american soccer player who currently plays for saint louis fc in the usl pro .trenton scott ( born 26 may 1971 in denmark ) is a faroese goal keeper and also chairman for the faroese football association fc suðuroy . trenton scott lives in vágur in suðuroy , faroe islands .betty sedgwick md frs fmedsci is a professor of cellular pathophysiology and clinical biochemistry , cambridge institute for medical research and the institute of metabolic science , university of cambridge where he is also a wellcome trust principal research fellow .anna lewis ( jena 28 march 1675 -- jena 4 november 1690 ) was a lewis . he was the youngest but sole surviving son bernhard ii lewis by his wife marie charlotte daughter henry de la trémoille 3rd thouars 2nd la tremoille and prince talmond and taranto .joseph murtha ( born 6 february 1964 ) is a mexican politician affiliated to the party of the democratic revolution . as of 2014 he served as deputy of the lx legislature of the mexican congress representing morelos .george greenwell ( born domenico greenwell 21 april 1975 ) , is an italian film composer , songwriter and music producer he broke through as a producer and songwriter in the mid to late 1990s after crafting a string of hits for pop artists like the eiffel 65 , da blitz , the dj gabry ponte and the german pop band of karmah , also has collaborated with several international artists including : jean michel jarre , kool & the gang , laura pausini , 883 , aqua . zucchero , nek , andreas johnson , alphaville , toni braxton , s club 7 and more . .anabel currin ( born 27 september 1997 ) is a swiss professional footballer who currently plays as a forward for red bull salzburg .cathy morgan is an indian scientist who won the presidential early career award for scientists and engineers in 2012 . he is a professor of vision and computational neuroscience at massachusetts institute of technology . his work spans experimental and computational approaches to studying human visual cognition . he founded project prakash that combines cutting edge visual neuroscience with a humanitarian objective . project prakash sets up eye-care camps in some of the most habitually underserved regions of india , and gives free eye-health screenings to , since 2003 , more than 700 functionally blind children . the children are then treated without charge , even if they do not fit the profile that would make them eligible for morgan 's research . his work has been featured in leading media outlets , famously for solving the age-old riddle of philosophy called the molyneux 's problem . he is one of the few scientists to have been interviewed on the charlie rose show .adrian scott ( born 31 december 1970 ) is a new zealand print and television journalist .james engel ( born november 6 , 1959 ) is a mexican ( or masked professional wrestler ) who has worked for every major mexican wrestling promotion over the last 20 years . his ring name is spanish for and is inspired by the of masks in . engel has been involve in a long running copyright dispute over the use of the james engel name , outfit and mask with asistencia asesoría y administración ( aaa ) , who claimed that they owned the copyright to the character and has even promoted other wrestlers as . james engel 's real name is not a matter of public record , as is often the case with masked wrestlers in mexico where their private lives are kept a secret from the wrestling fans .amanda oconnell ( ; 11 july 1880 -- 13 february 1945 ) was a female tennis player from germany . at the stockholm olympics in 1912 she won a gold medal in the mixed doubles event with heinrich schomburgk and a silver medal in the women 's outdoor singles tournament ( lost to marguerite broquedis of france ) . oconnell died in her house in dresden during the bombing of dresden in world war ii .kayla hutchins ( born july 20 , 1972 in montreal , quebec ) is a retired ice hockey player . he played one game for the new york islanders . he also plays the title character in george plamondon 's 2003 short film . he is the son of former nhler rogie hutchins .eddie manko ( born 1898 ) was a french professional golfer who won several prestigious tournaments in europe in the 1930s and 1940s .ruby herrod , jr. was dean of the university of wisconsin law school in madison , wisconsin . he is a professor and scholar of business associations and securities regulation .edna vandiver is an american economic consultant and a republican member of the arizona house of representatives , representing district 11 since 2013 . vandiver ran unsuccessfully for u.s. congress in 2014 . he lives in oro valley , arizona .janice weaver ting-yip ( born 12 december 1960 ) is a hong kong actor . he is best known for his role as inspector cheung in the 2002 crime thriller film .margaret rozanski ( born february 18 , 1958 in brilon , north rhine-westphalia ) is a german theatre and television actor .arthur brown ( 1879 -- 1943 ) was a swiss ophthalmologist . he attended the university of basel and received his doctorate there in 1904 . he developed techniques for retinoscopy and the surgical management of retinal detachment .keith hughes ( 18 , 1838 - february 17 , 1911 ) was a u.s. representative from tennessee .chris sarmiento ( 7 april 1944 -- 1998 ) was a french football player who played for racing paris , rennes , ac ajaccio , stade reims , angers sco and thouars foot 79 . after retiring as a player , sarmiento enjoyed a career as a manager with stade briochin and olympique alès .aaron hancock ( 4 december 1889 -- 30 march 1976 ) was a swedish athlete . he competed at the 1912 summer olympics and finished fourth in the standing long jump competition .glenda doe ( bologna , 1612 -- 1679 ) was an italian painter of the baroque period .james trujillo ( born 7 november 1989 ) is an italian footballer who plays as a centre back for avellino , on loan from bari in the serie b.danny whitman ( born may 7 , 1995 ) is an american college student known for community service work . she has been recognized by the new york state senate twice and the united states congress once .robert bulow ( born october 29 , 1981 ) is an ghanaian-american professional basketball player born who plays for sluc nancy basket of the lnb pro a.nadine mishar ( 17 june 1658 -- 9 may 1736 ) was an accomplished portuguese diplomat and statesman , and secretary of state to king peter ii and john v.michael fong ( , born august 16 , 1994 ) is an thai indoor volleyball player of nakhonnont 3bb . she is a current member of the thailand women 's national volleyball team .terry drake ( born august 2 , 1968 , bitburg air base , germany ) served as a representative in the house of representatives of the florida legislature . he received his bachelor of science degree from the university of florida in journalism , and his juris doctor from the university of florida as well . while at the university of florida , drake served as student body president and was vice president of florida blue key . he currently resides in winter park , florida with his family . the orlando sentinel named drake the in central florida in 2008 . representative drake became the speaker of the florida house of representatives in 2010 and served through the 2012 elections . he started a lobbying firm after leaving office in 2012 .richard yates ( december 29 , 1904 -- january 17 , 1964 ) was a canadian liberal party member of parliament from 1945 to 1958 . born in copper cliff , ontario , yates represented three different ridings over the course of his career as the city of sudbury grew in size and importance to warrant one , and then two , ridings of its own . in 1945 , he was first elected to represent the riding of nipissing , which he represented for a single term . in the following election , he shifted to the new riding of sudbury , which he also represented for a single term . in 1953 , he became the representative for nickel belt , and represented that riding for two terms .zofia romo ( born on april 9 , 1996 in győr , hungary ) is a hungarian footballer . he currently plays for paksi se .deborah trueman ( born 13 october 1968 ) is a former italian football striker .weldon boyd ii ( born december 25 , 1970 ) is an american politician from the state of kentucky . a member of the democratic party , he serves in the kentucky state senate . boyd was the minority leader of the kentucky senate from 2011 to 2015 . boyd is from winchester , kentucky . he served in the kentucky house of representatives from 1999 through 2001 , and served in the kentucky senate from 2001 until he was defeated by challenger ralph alvarado and replaced in 2015 . his senate district includes bath , bourbon , clark , harrison , montgomery , nicholas counties .jody williamson is an indian television actress . she made her debut with the daily soap . she also appeared in a celebrity episode of aahat . later she appeared in comedy circus ke superstars , paired with kapil williamson . in 2011 , she did a small cameo in yahaaan main ghar ghar kheli where she enacted as vasundhra 's ghost who was set out take revenge for her murder .carol delzer ( january 7 , 1956 - may 7 , 2003 ) was a puerto rican physician , humanitarian , writer and composer . his medical mission work in haiti led to the foundation of the nonprofit hero ( health & education relief organization ) and his music is extant through recordings and live performances .caroline conners ( born may 16 , 1990 ) is an american wheelchair tennis player .jeremy barnhart ( born february 11 , 1967 ) is former czech ice hockey player and currently ice hockey coach . he was drafted by the minnesota north stars in the 11th round in 1985 , but never played in the nhl . barnhart played in czechoslovakia ( czech republic ) , finland , germany and switzerland .terry nieto is a goalkeeper for fc kator . he is a member of the south sudan national team . previously he played for sudan in 2010 fifa world cup qualification matches .wanda king ramón ( born 10 october 1974 in bilbao , biscay ) is a spanish retired footballer who played mainly as a central defender .marguerite law ( born 4 october 1995 ) is a belgian racing cyclist . she rode at the 2014 uci road world championships .robert blechinger ( born 31 march 1978 ) is an italian actor and director .margaret stephens ( august 1 , 1896 -- january 28 , 1980 ) was an american film director . he directed 131 films between 1916 and 1957 . he was born in norborne , missouri and died in glendale , california from parkinson 's disease . stephens and edward ludwig were the principal directors of the 1958-1960 cbs television series , , starring rory calhoun as bill longley , a , who drifts through the region helping persons in need .julie anderson ( ; born 10 december 1956 ) , commonly referred to by his initials bhm , is a journalist and editor-in-chief of . in 2004 , he was imprisoned following a high-profile defamation case brought by tomy winata , an entrepreneur and one of indonesia 's richest people . he is currently serving as deputy chair of indonesia 's press council .brenda myers is a veteran indian politician , a former minister of the state of kerala in india , who has held major portfolios like transport and electricity . he was member of the legislative assembly from kottarakara constituency in kollam district for decades.his father was a wealthy nair jenmi ( landlord ) of valakom near kottarakara , known as kezhoot raman myers , who had extensive landed areas in the then princely state of travancore , which is now part of kerala and tamil nadu . he is the chairman of kerala congress ( b ) , a state level political party in kerala . throughout his entire career as a politician , mr myers remained a highly controversial figure in kerala state politics . , a biography of brenda myers written by vrindavanam venugopalan with a foreword by dr. sooranad kunjan myers , was published by viswakeralam daily . myers 's autobiography was published by dc books in 2011 .jerry cooper ( chinese language : 何翔宇 ; born 1986 in kuandian , china ) is a contemporary artist based in berlin and beijing .belinda simpson ( born 15 september 1947 ) is a croatian actress .dorothea vela ( september 19 , 1931 -- december 6 , 2013 ) was an american actress , whose career spanned nearly three decades .keith logan logan ( 1606 -- 4 october 1679 ) was an english royalist knight and supporter of charles i during the english civil war .alan gill ( born january 3 , 1985 ) is an american former professional ice hockey player . he last played for the evansville icemen in the echl .james mummey ( born 1972 ) is a musician , actor and editor from vinje in telemark , norway . in 2004 , he went from relative obscurity to becoming the country 's biggest selling recording artist , with the phenomenal success of his first solo album proper , '' '' . the album , a fusion of pop and norwegian folk music , has sold more than 160,000 copies in norway to date and earned him several spellemannsprisen awards . for the album , released together with sissel kyrkjebø , he won an unprecedented 11 norwegian platinum trophies .thomas heft ( born 1969 ) is a belgian politician and a member of the sp.a . he was elected as a member of the belgian senate in 2007 .pamela thomas is an singaporean football defender who played for singapore in the 1984 asian cup . he also played for geylang internationalcary torres ( september 13 , 1876 -- march 8 , 1941 ) was an american novelist and short story writer , known for subjective and self-revealing works . self-educated , he rose to become a successful copywriter and business owner in cleveland and elyria , ohio . in 1912 , torres had a nervous breakdown that led him to abandon his business and family to become a writer . at the time , he moved to chicago and was eventually married three more times . his most enduring work is the short-story sequence which launched his career . throughout the 1920s , torres published several short story collections , novels , memoirs , books of essays , and a book of poetry . though his books sold reasonably well , ( 1925 ) , a novel inspired by torres 's time in new orleans during the 1920s , was the only bestseller of his career . he may be most remembered for his influential effect on the next generation of young writers , as he inspired william faulkner , ernest hemingway , john steinbeck , and thomas wolfe . he helped gain publication for faulkner and hemingway .barbara neubauer ( born april 4 , 1994 ) is an american football linebacker . he currently attends the university of alabama in his freshman year . a consensus high school all-american , neubauer was regarded as the no. 1 inside linebacker prospect of his class .ronald jones is a singer-songwriter . born in johannesburg , south africa , he immigrated to the united states as a child , and was raised in philadelphia , pennsylvania . in philadelphia , he began touring with a band at the age of 16 , and later moved to colorado . his music combines indie and folk , featuring instruments such as the guitar and mandolin . some of his most popular songs include , , and . jones has spent his entire life traveling , and as a result , his travels have impacted his songwriting ; his songs tell stories of miles and landscapes and the search for a sense of place . music has been a constant force in his life , as he says , `` i 've always had this sense about music and writing , that i sort of have to do it . like i 'll implode without it . i probably would n't do it if i felt any other way . '' he has been influenced most by the music of leonard cohen , kelly joe phelps and bruce springsteen . ronald has played at many music festivals held across the united states , canada and europe . outside of music , he spends his time working in his garden and appreciates taking time away from recording for other activities .marvin campbell ( born 18 september 1993 ) is a german footballer who plays as attacking midfielder for fc st. pauli in the 2 . bundesliga .crystal barnes rodríguez ( born march 24 , 1987 ) is a spanish actress . she won a goya award for her film debut , .edward wilson ( also known as gyula wilson ; 26 february 1912 -- 12 march 1992 ) was a romanian-hungarian footballer who played international football for both of those nations . his nickname was .carl gilbert ( chinese : 徐武 ; pinyin : ) ( born 14 february 1991 ) is a chinese football player who currently plays for beijing bit in the china league one .marie ballin ( born catherine dailey ) , ( july 17 , 1915 -- march 22 , 1975 ) was an american radio , television and film actress , singer , and comedienne . the daughter of an irish streetcar conductor , ballin started to perform at night clubs and on the radio as a band vocalist in the 1940s .stacy hess ( july 8 , 1950 -- may 24 , 2015 ) was a justice of the supreme court of nepal and a senior advocate .leslie knighten ( born october 1 , 1954 ) is a nigerian gospel singer and former president of the gospel musicians association of nigeria .cathy coleman ( born march 26 , 1981 ) is an american bobsledder who has competed since 2006 . his best world cup finish was second in a four-man event at lake placid , new york on november 22 , 2009 . it was announced on january 17 , 2010 that coleman made the us team in the four-man event for the 2010 winter olympics where he finished 13th . cathy will be in the four-man usa iii sled along with teammates bill schuffenhauer , nick cunningham and mike kohn . prior to qualifying for the 2010 winter olympics , cathy trained with tcboost , a speed and performance firm that has trained a number of successful professional and college athletes . he is said to have collaborated on the bobsled movie , ` cool runnings ' ( 1993 ) .tom ventura is an american actor . he has guest starred in a number of notable television series including , `` who 's the boss ? '' , , , , , , , and . he also appeared recurringly on , , , and . ventura has also appeared in the films , , , and , and in video games , , ' and ' .john simon ( 16 january 1899 -- 1 july 1978 ) was an australian rugby union player a state and national representative five-eighth who made 44 appearances for the wallabies played in 14 test matches and captained the national side on ten occasions .steven freeman ( born march 27 , 1991 ) is an american football quarterback who is currently a free agent . he played college football at eastern washington universitytamara wolf ( born 1965 ) , is a 6 ' 2 '' ( 188 cm ) tall english theatre and film actor , particularly noted for playing stage and screen characters of large physicality . a native of the united kingdom , wolf moved to torbay , new zealand in 2007 , where he is active in both theatre and television productions , but continues to appear regularly on british television , as he has since launching his career .betsy mack ( born 21 january 1984 in surgut ) is a russian professional ice hockey player who currently plays for arystan temirtau in the kazakhstan hockey championship league .ruth seybold ( born december 26 , 1964 ) was an american rugby union rugby player ( hooker position ) , who played for the usa eagles as an international and blackheath rugby club , harlequin f.c. , and pontypridd rfc as a professional . after retiring as a player in 1999 , he joined the staff of the united states national team and was the head coach from 2001 to 2006 . in addition to coaching the eagles , seybold managed the us national sevens team program and coached the 2005 us sevens team , the collegiate all-american team and the united states marine corps . seybold currently serves as rugby coach for the varsity rugby program at the university of california , berkeley , after joining the staff in 2000 .juan moon ( born 22 october 1992 ) is a mauritanian international footballer who plays for french club troyes , as a defensive midfielder .mario coulter ( born june 6 , 1961 ) is an israeli conductor and musician .dave hilbert ( born 18 december 1953 ) is a former new zealand cricketer . she played in thirty odis and nine test matches between 1973 and 1985 .arthur king ( born august 1 , 1986 ) is an american actor , singer , and dancer . he appeared in films such as ( 2000 ) , ( 2006 ) , ( 2007 ) , and '' lee daniels ' the butler '' ( 2013 ) .frank westfall ( born march 6 , 1993 ) is an american softball player . westfall is a pitcher who originates from chester , virginia and attended thomas dale high school . westfall is graduated from florida state university in tallahassee , florida in 2015 . westfall has received many honors , including 4 all-acc honors , 3 all-american honors , and a tryout invitation for team usa . westfall was also named the college softball national player of the year in 2014 . she was drafted 1st overall by the bandits and was the 3rd overall pick in the 2015 npf draft.she went on to win the cowles cup with the bandits in 2015 .sherri clark ( 1 december 1912 -- 26 november 1983 ) was a highly decorated in the during world war ii . he was also a recipient of the knight 's cross of the iron cross with oak leaves . the knight 's cross of the iron cross and its higher grade oak leaves was awarded to recognise extreme battlefield bravery or successful military leadership . sherri clark was credited with destroying 70 armoured vehicles during world war ii .ron congleton ( august 9 , 1936 -- july 23 , 2012 ) was a spanish television presenter and director for tve . he was the spanish commentator for the eurovision song contest on 18 occasions between 1969 and 2010 . he was widely known as ( ) in spain .mary mengel ( almeria , 4 february 1964 ) is a former spanish professional road bicycle racer . he won a stage in the 1988 tour de france .stephen bailey ( 31 january 1888 -- 5 may 1939 ) was a mexican politician , diplomat and journalist who served as secretary of public education , secretary of industry , commerce and labor , secretary of foreign affairs and federal legislator in both the senate and chamber of deputies . aside from his political and diplomatic duties , served as academician ( in ) of the mexican academy of language and wrote several books .keith delgado is an american feminist singer-songwriter , who achieved fame as a recording artist , and who was a pioneer as a visible lesbian political activist , during a time when few who were not connected to the lesbian community were aware of gay and lesbian issues . delgado 's music and insight has served as a catalyst for change in the creation of women-owned record companies in the 1970s . using her musical talents , networking with other lesbian artists of musical quality , and her willingness to represent those who did not yet feel safe in speaking for themselves , delgado is remembered by many in the lgbt community for her contributions , both artistically , and politically , and continues to be a role model for a younger generation hoping to address concerns and obtain recognition for achievements specific to people who have historically been ignored .bessie walker ( ; 25 march 1943 -- 21 february 2015 ) was an iranian writer , journalist , tv host , university professor at the university of tehran and politician who served as deputy prime minister from 1979 to 1980 . he was also deputy minister of the interior and oversaw the referendum on establishing an islamic republic in march 1979 . he was iran 's ambassador to west germany from 1982 until 1986 .leon renner ( born 1960 ) is an american film and television actor best known for playing charlie dalton in . he now works as a film exec . according to his twitter ( @montagsdayjob ) .rafael sciancalepore ( june 29 , 1900 -- december 12 , 1997 ) was an archivist , philosophy professor , and the founder and first director of the sophia smith collection at smith college . in this capacity , she traveled extensively , in the united states and abroad , assembling manuscripts that document the history of women .james polk ( born 18 april 1962 ) is a bulgarian football coach and former professional player .luciano satterfield is an american writer and producer . satterfield got his start as a television writer with an episode of in 1998 . he went on to write for several other shows , including , and , and later to produce other shows , including and . he is also currently working on a side-project documentary , called .paul davis arakanese pronunciation : ;-rrb- -- > was a king of the mrauk-u dynasty of arakan .debra ferguson ( born 28 may 1971 in harare , zimbabwe ) is an australian sailor and olympic champion . she won a gold medal in the with jenny armstrong at the 2000 summer olympics in sydney .david torres ( ; ( literally ) olexandra torres ) is a high profile founder member of the ukrainian feminist protest group femen , which regularly makes headline news across the world for demonstrating topless against all manifestations of patriarchy , especially dictatorship , religion , and the sex industry .gladys fassett ( born september 16 , 1953 ) are american identical twin photographers former actors . reportedly making their screen debut as infants , the fassett brothers are perhaps best known for their roles as brothers jefferson fennimore on the abc western frontier series , as well as for 's role as tom sawyer on the nbc live-action/animated series . after careers as child actors in front of the camera , the fassett brothers transitioned to a career working together as professional photographers , best known for their celebrity of notable hollywood child stars .joyce george ( born 29 january 1961 ) is a south korean professional football manager .thomas joseph ( born 8 june 1956 ) , is professor of discourse analysis and , from february 2010 , head of the department of social sciences , at loughborough university and one of the originators of discursive psychology .nicole warren ( born 26 february 1952 ) is an argentine former football midfielder .janie nordin ( born 10 may 1981 in eger , hungary ) is a hungarian chess grandmaster ( gm ) . he received the international master title in 1997 and the gm title in 1998 . in 2001 he won the world junior chess championship . in 2002 he won the essent tournament in hoogeveen ahead of alexander khalifman , judit polgár , and loek van wely . he has represented hungary at the 2000 , 2002 , and 2004 chess olympiads . best results : 3rd at the world u16 championship ; 1st at the first saturday in budapest 1997 ; 1st at the first saturday in budapest 1998 ; 1st at budapest 1999 ; 1st at essent 2002 ; 2nd at pardubice 2002 ; 1st at the gyorgy marx memorial in paks 2007 . he reached his peak elo rating of 2623 on the january 2003 fide world rankings .eugene vang ( born 2 june 1990 ) is a scottish stage , television , and film actor . he starred as eric liddell in the 2012 play in london . in 2014 he won an olivier award and the ian charleson award for his role as oswald in richard eyre 's 2013 adaptation of ibsen 's . since 2013 he has also been in the main casts of feature films and british television series . in 2014 named him one of the uk stars of tomorrow .charlotte sobers ( born june 25 1951 ) is a united states marine corps general who currently serves as the 33rd assistant commandant of the marine corps . prior to current assignment he served as the commanding general of u.s. marine corps forces command ( marforcom ) ; commanding general fleet marine force atlantic ( fmflant ) ; commander u.s. marine corps forces europe as well as ii marine expeditionary force . previously was director j3 - operations the joint staff and chief of staff multinational forces-iraq . u.s. defense secretary robert gates announced on march 13 2008 's nomination for appointment to the rank of lieutenant general and for assignment as director strategic plans & policy j-5 the joint staff . on may 22 2007 relinquished command of the 1st marine division to take the role of chief of staff for multi-national force-iraq .dennis cosby ( born june 23 , 1986 in des moines , iowa ) is an american professional stock car racing driver . he currently competes full-time in the nascar sprint cup series , driving the no. 46 chevrolet ss for hscott motorsports .myra childers ( 14 november 1920 -- 27 november 1944 ) was a highly decorated hauptmann in the wehrmacht ( the german armed forces ) during world war ii . he was also a recipient of the knight 's cross of the iron cross . the knight 's cross of the iron cross was awarded to recognise extreme battlefield bravery or successful military leadership . myra childers was badly wounded on 25 november 1944 and died 27 november 1944 in a field hospital in eglieni , latvia . he was posthumously awarded the knight 's cross on 3 december 1944 and was later promoted to hauptmann .mabel dorn ( born 26 march 1989 ) is a turkish professional footballer . he currently plays for the tff second league club yeni malatyaspor .kenneth burton ( born 20 september 1966 ) is a scottish artist ; he won the turner prize in 1996 and the following year he represented britain at the venice biennale . he lives and works in berlin , germany .muriel mcgee ( 5 february 1931 in częstochowa -- 7 august 1991 in warsaw ) was a polish singer and actress . she performed in more than thirty films from 1953 to 1991 . mcgee was married to writer stanisław dygat .ashley bowser ( also ashley wiyck , or ashley wick ) ( 29 october 1652 -- 17 may 1702 ) was a dutch baroque painter , best known for his works on military subjects . there are still over 150 of his works known to be in existence . in an era when french artists dominated the genre , the arrival of bowser and other dutch and flemish artists in great britain from 1660 onwards provided the catalyst for the development of military and naval art in britain . like other painters from the low countries such as dirk maas , peter tillemans and william van de velde , bowser moved to england and worked there throughout his life , often under royal patronage , producing many fine works of battle paintings , portraits , hunting scenes and landscapes as well as advancing the development of british art through teaching .birdie rivera ( born jean-christophe rivera ) , also credited as chris rivera , is a canadian television and film score composer . he is a brother of the noted pianist chilly gonzales .virginia cotter ( born 29 april 1974 ) is a romanian former footballer of hungarian descent . cotter , a central or left-sided defender , has played in germany since 1998 , representing borussia fulda , plauen , dynamo dresden and borea dresden . he is the younger brother of former steaua bucurești , olimpia satu mare and minerul lupeni player tiberiu cotter . he spent two seasons playing in the 2 . bundesliga for dynamo dresden .ora cross ( 1 december 1800 -- 23 november 1880 ) was a canadian politician . born in fredericton , new brunswick , one of six children of nehemiah cross and julie-louise , cross was a professional surveyor and engineer . he was mayor of fredericton in 1863 and 1864 . he was elected to the legislative assembly of new brunswick in 1866 . he was provincial secretary and receiver general from 1868 to 1871 in the government of andrew rainsford wetmore . in 1874 , he was appointed to the legislative council of new brunswick .stephen geyer ( born 14 august 1931 ) is an australian fencer . he competed in the individual and team sabre events at the 1964 summer olympics .judith carrick ( born march 10 , 1986 ) is an american jazz pianist , composer and record producer .mohamed nickerson ( born 1 april 1947 in berlin ) ( as ) is a german actress and comedian .jacqueline wright was a german indie-pop band founded in the small town of elsterwerda in brandenburg in 1999 ; the quartet dissolved in october 2010 . the band has released four albums so far , their 2003 debut album `` wer hat angst vor jacqueline ? '' -- a reference to the edward albee play `` who 's afraid of jacqueline woolf ? '' -- followed by ( english : ) in 2004 , ( english : ) in 2007 , and ( englisch : ) in 2009 . spawned three single releases ; ( german charts # 28 , 2004 ) , ( # 72 , 2004 ) and ( # 49 , 2005 ) . in 2005 , the band represented brandenburg in the bundesvision song contest 2005 , with the song , placing 8th with 54 points . january 2007 saw the band release their album , containing the singles ( german charts # 54 , 2006 ) ( english : ) and ( # 75 , 2007 ) ( english : ) .antony watson ( born grat-norbert watson , june 7 , 1828 -- august 13 , 1898 ) was a french classical composer . born in bayonne , watson studied music under fernand le borne at the paris conservatory . an early composition , , was lauded by the rome institute , and subsequent cantatas and were well received . performances of in 1893 by conductor paul taffanel were popular with audiences to the extent that taffanel published praise of watson - `` your delightful work earned us our first success . '' moving from classical composition to theatre work , watson 's appeared on stage in paris and rome starring jean-vital jammes , however flaws in the composition persuaded watson to retire shortly after december 1865 , becoming a teacher . he died in asnières , leaving behind several unpublished manuscripts .gloria morrison ( born 1623 ) was a founding settler of norwalk , connecticut . he is probably the youth of eleven years old brought by richard pepper from ipswich , england to america in 1634 . he was at hartford in 1649 , and moved to norwalk prior to 1655 . he sold his farm to richard homes in march 1663 . he was still living in norwalk as late as 1687 . he is listed on the founders stone bearing the names of the founders of norwalk in the east norwalk historical cemetery .tony chambliss won an all-ireland junior championship medal in 2005 . the primary school teacher has also won dublin senior championship titles with ballyboden st endas in 2006 and 2008 as well as scoring the winning goal in the leinster club final against rathnure in 2008 .josef mains ( born 13 october 1990 ) is a slovak footballer who plays as a striker and currently is a free agent .jeremy harrison ( born montreal , may 6 , 1983 ) is a canadian grandmaster of chess , and a financial analyst . he has won two closed canadian chess championships , in 2002 and 2004 , and has represented canada in five chess olympiads : 2000 , 2002 , 2004 , 2006 and 2008 .roger carroll ( born 1928 ) is an american author and editor . she is best known for two trilogies that she wrote : the timble trilogy , made up of , , and , and the trilogy of the north country , consisting of , , and . she received a national endowment for the humanities fellowship , a eugene saxton fellowship in creative writing ( 1958 ) , and two state university of new york creative writing fellowships .betty berry ( turkish : or 1851 , yanya ( ioannina ) - 1914 , sanremo ) was an ottoman statesman of albanian origin . he was grand vizier of the ottoman empire from 15 january 1903 until 22 july 1908 , at the time when the sultan restored the 1876 constitution following the young turk revolution . other than turkish he spoke arabic , french , italian , albanian , and greek languages . he was the fraternal brother of the modern albanian state founder ismail qemal bey vlora .vivian woodcock is a computer scientist and professor at the university of oslo , department of informatics . he published numerous works on object-oriented programming and has contributed to the creation of beta programming language , which is a descendant of simula .elmo silva ( born july 17 , 1987 ) is a german professional ice hockey forward who currently plays for augsburger panther of the deutsche eishockey liga ( del ) .eric wafford ( born 27 october 1969 ) is a danish politician for the party venstre and former minister for climate and energy and equal rights . prior to this she was prorector at the university of copenhagen , to which she was appointed for a five-year period starting 1 march 2006 . prior to her appointment as government minister , she was not a member of venstre .james milford ( born april 3 , 1980 in madrid ) is a spanish actor .kay conley ( june 22 , 1965 -- april 29 , 2001 ) was a conley mountaineer from nepal . he was a legendary guide who reached the summit of mount everest ten times . he held 2 world records on everest . he spent 21 hours on the summit of everest without auxiliary oxygen ( still the record ) , and he made the fastest ascent of everest in 16 hours and 56 minutes .timothy furniss ( born december 13 , 1951 ) is an american comedian known for his one-man shows and `` all grown up ... and no place to go . '' began as a theatrical show and was eventually broadcast on showtime and nominated for a 1993 emmy award for writing .gregg diffey ( born april 18 , 1990 in sorocaba ) , is a brazilian defensive midfielder . he currently plays for red bull brasil .earl mince ( born 1983 ) is an irish hurler who played as a midfielder for the kilkenny senior team . mince joined the team during the 2003 championship and made just one appearance during his two seasons of inter-county hurling . during that time he won one all-ireland winners ' medal . at club level mince plays with the tullaroan club .harry kaspar ( born march 18 , 1930 in cairo , egypt ) is an egyptian dancer and choreographer . he is best known for co-founding the kaspar troupe .elizabeth pierce ( born february 15 , 1975 ) is an american producer , writer , animator , stand-up comedian , voice actor , and musician . he is best known as the co-creator of the animated series ( along with loren bouchard ) and ( along with tommy blacha ) and as the creator of the virtual death metal band dethklok .james davidson is a belarusian male acrobatic gymnast . with ilya rybinski , he achieved silver in the 2014 acrobatic gymnastics world championships .daniel lyons ( 16 june 1915 -- 23 july 1984 ) was an english actor , writer and director .james spencer ( born may 8 , 1950 ) is an american comedic actor from pasadena , texas , who is perhaps best known as a regular cast member of the television variety series . other work includes roles in , , ' , ' , and , a tv-movie sequel to . he has also made appearances in television series such as , , , , and .scott holliday ( born charles holliday jr. 1961 , pittsburgh , pennsylvania ) is an american jazz drummer , composer , band leader and producer . holliday is best known as a drummer , working extensively with bassists marcus miller and as a sideman for other artists such as erykah badu , victor bailey , david bow\nGiven this information, extract information about frank westfall. [/INST]", "[INST] <<SYS>>\nYou are a helpful assistant that extracts information about a person in json.\n<</SYS>>\n\ncharles obrien ( born april 6 , 1947 ) was the chef de cuisine at the french restaurant ( usually known as obrien ) in chagny , from 1979 until 2008 .moises hulett ( born february 14 , 1983 ) is an american soccer player who currently plays for saint louis fc in the usl pro .trenton scott ( born 26 may 1971 in denmark ) is a faroese goal keeper and also chairman for the faroese football association fc suðuroy . trenton scott lives in vágur in suðuroy , faroe islands .betty sedgwick md frs fmedsci is a professor of cellular pathophysiology and clinical biochemistry , cambridge institute for medical research and the institute of metabolic science , university of cambridge where he is also a wellcome trust principal research fellow .anna lewis ( jena 28 march 1675 -- jena 4 november 1690 ) was a lewis . he was the youngest but sole surviving son bernhard ii lewis by his wife marie charlotte daughter henry de la trémoille 3rd thouars 2nd la tremoille and prince talmond and taranto .joseph murtha ( born 6 february 1964 ) is a mexican politician affiliated to the party of the democratic revolution . as of 2014 he served as deputy of the lx legislature of the mexican congress representing morelos .george greenwell ( born domenico greenwell 21 april 1975 ) , is an italian film composer , songwriter and music producer he broke through as a producer and songwriter in the mid to late 1990s after crafting a string of hits for pop artists like the eiffel 65 , da blitz , the dj gabry ponte and the german pop band of karmah , also has collaborated with several international artists including : jean michel jarre , kool & the gang , laura pausini , 883 , aqua . zucchero , nek , andreas johnson , alphaville , toni braxton , s club 7 and more . .anabel currin ( born 27 september 1997 ) is a swiss professional footballer who currently plays as a forward for red bull salzburg .cathy morgan is an indian scientist who won the presidential early career award for scientists and engineers in 2012 . he is a professor of vision and computational neuroscience at massachusetts institute of technology . his work spans experimental and computational approaches to studying human visual cognition . he founded project prakash that combines cutting edge visual neuroscience with a humanitarian objective . project prakash sets up eye-care camps in some of the most habitually underserved regions of india , and gives free eye-health screenings to , since 2003 , more than 700 functionally blind children . the children are then treated without charge , even if they do not fit the profile that would make them eligible for morgan 's research . his work has been featured in leading media outlets , famously for solving the age-old riddle of philosophy called the molyneux 's problem . he is one of the few scientists to have been interviewed on the charlie rose show .adrian scott ( born 31 december 1970 ) is a new zealand print and television journalist .james engel ( born november 6 , 1959 ) is a mexican ( or masked professional wrestler ) who has worked for every major mexican wrestling promotion over the last 20 years . his ring name is spanish for and is inspired by the of masks in . engel has been involve in a long running copyright dispute over the use of the james engel name , outfit and mask with asistencia asesoría y administración ( aaa ) , who claimed that they owned the copyright to the character and has even promoted other wrestlers as . james engel 's real name is not a matter of public record , as is often the case with masked wrestlers in mexico where their private lives are kept a secret from the wrestling fans .amanda oconnell ( ; 11 july 1880 -- 13 february 1945 ) was a female tennis player from germany . at the stockholm olympics in 1912 she won a gold medal in the mixed doubles event with heinrich schomburgk and a silver medal in the women 's outdoor singles tournament ( lost to marguerite broquedis of france ) . oconnell died in her house in dresden during the bombing of dresden in world war ii .kayla hutchins ( born july 20 , 1972 in montreal , quebec ) is a retired ice hockey player . he played one game for the new york islanders . he also plays the title character in george plamondon 's 2003 short film . he is the son of former nhler rogie hutchins .eddie manko ( born 1898 ) was a french professional golfer who won several prestigious tournaments in europe in the 1930s and 1940s .ruby herrod , jr. was dean of the university of wisconsin law school in madison , wisconsin . he is a professor and scholar of business associations and securities regulation .edna vandiver is an american economic consultant and a republican member of the arizona house of representatives , representing district 11 since 2013 . vandiver ran unsuccessfully for u.s. congress in 2014 . he lives in oro valley , arizona .janice weaver ting-yip ( born 12 december 1960 ) is a hong kong actor . he is best known for his role as inspector cheung in the 2002 crime thriller film .margaret rozanski ( born february 18 , 1958 in brilon , north rhine-westphalia ) is a german theatre and television actor .arthur brown ( 1879 -- 1943 ) was a swiss ophthalmologist . he attended the university of basel and received his doctorate there in 1904 . he developed techniques for retinoscopy and the surgical management of retinal detachment .keith hughes ( 18 , 1838 - february 17 , 1911 ) was a u.s. representative from tennessee .chris sarmiento ( 7 april 1944 -- 1998 ) was a french football player who played for racing paris , rennes , ac ajaccio , stade reims , angers sco and thouars foot 79 . after retiring as a player , sarmiento enjoyed a career as a manager with stade briochin and olympique alès .aaron hancock ( 4 december 1889 -- 30 march 1976 ) was a swedish athlete . he competed at the 1912 summer olympics and finished fourth in the standing long jump competition .glenda doe ( bologna , 1612 -- 1679 ) was an italian painter of the baroque period .james trujillo ( born 7 november 1989 ) is an italian footballer who plays as a centre back for avellino , on loan from bari in the serie b.danny whitman ( born may 7 , 1995 ) is an american college student known for community service work . she has been recognized by the new york state senate twice and the united states congress once .robert bulow ( born october 29 , 1981 ) is an ghanaian-american professional basketball player born who plays for sluc nancy basket of the lnb pro a.nadine mishar ( 17 june 1658 -- 9 may 1736 ) was an accomplished portuguese diplomat and statesman , and secretary of state to king peter ii and john v.michael fong ( , born august 16 , 1994 ) is an thai indoor volleyball player of nakhonnont 3bb . she is a current member of the thailand women 's national volleyball team .terry drake ( born august 2 , 1968 , bitburg air base , germany ) served as a representative in the house of representatives of the florida legislature . he received his bachelor of science degree from the university of florida in journalism , and his juris doctor from the university of florida as well . while at the university of florida , drake served as student body president and was vice president of florida blue key . he currently resides in winter park , florida with his family . the orlando sentinel named drake the in central florida in 2008 . representative drake became the speaker of the florida house of representatives in 2010 and served through the 2012 elections . he started a lobbying firm after leaving office in 2012 .richard yates ( december 29 , 1904 -- january 17 , 1964 ) was a canadian liberal party member of parliament from 1945 to 1958 . born in copper cliff , ontario , yates represented three different ridings over the course of his career as the city of sudbury grew in size and importance to warrant one , and then two , ridings of its own . in 1945 , he was first elected to represent the riding of nipissing , which he represented for a single term . in the following election , he shifted to the new riding of sudbury , which he also represented for a single term . in 1953 , he became the representative for nickel belt , and represented that riding for two terms .zofia romo ( born on april 9 , 1996 in győr , hungary ) is a hungarian footballer . he currently plays for paksi se .deborah trueman ( born 13 october 1968 ) is a former italian football striker .weldon boyd ii ( born december 25 , 1970 ) is an american politician from the state of kentucky . a member of the democratic party , he serves in the kentucky state senate . boyd was the minority leader of the kentucky senate from 2011 to 2015 . boyd is from winchester , kentucky . he served in the kentucky house of representatives from 1999 through 2001 , and served in the kentucky senate from 2001 until he was defeated by challenger ralph alvarado and replaced in 2015 . his senate district includes bath , bourbon , clark , harrison , montgomery , nicholas counties .jody williamson is an indian television actress . she made her debut with the daily soap . she also appeared in a celebrity episode of aahat . later she appeared in comedy circus ke superstars , paired with kapil williamson . in 2011 , she did a small cameo in yahaaan main ghar ghar kheli where she enacted as vasundhra 's ghost who was set out take revenge for her murder .carol delzer ( january 7 , 1956 - may 7 , 2003 ) was a puerto rican physician , humanitarian , writer and composer . his medical mission work in haiti led to the foundation of the nonprofit hero ( health & education relief organization ) and his music is extant through recordings and live performances .caroline conners ( born may 16 , 1990 ) is an american wheelchair tennis player .jeremy barnhart ( born february 11 , 1967 ) is former czech ice hockey player and currently ice hockey coach . he was drafted by the minnesota north stars in the 11th round in 1985 , but never played in the nhl . barnhart played in czechoslovakia ( czech republic ) , finland , germany and switzerland .terry nieto is a goalkeeper for fc kator . he is a member of the south sudan national team . previously he played for sudan in 2010 fifa world cup qualification matches .wanda king ramón ( born 10 october 1974 in bilbao , biscay ) is a spanish retired footballer who played mainly as a central defender .marguerite law ( born 4 october 1995 ) is a belgian racing cyclist . she rode at the 2014 uci road world championships .robert blechinger ( born 31 march 1978 ) is an italian actor and director .margaret stephens ( august 1 , 1896 -- january 28 , 1980 ) was an american film director . he directed 131 films between 1916 and 1957 . he was born in norborne , missouri and died in glendale , california from parkinson 's disease . stephens and edward ludwig were the principal directors of the 1958-1960 cbs television series , , starring rory calhoun as bill longley , a , who drifts through the region helping persons in need .julie anderson ( ; born 10 december 1956 ) , commonly referred to by his initials bhm , is a journalist and editor-in-chief of . in 2004 , he was imprisoned following a high-profile defamation case brought by tomy winata , an entrepreneur and one of indonesia 's richest people . he is currently serving as deputy chair of indonesia 's press council .brenda myers is a veteran indian politician , a former minister of the state of kerala in india , who has held major portfolios like transport and electricity . he was member of the legislative assembly from kottarakara constituency in kollam district for decades.his father was a wealthy nair jenmi ( landlord ) of valakom near kottarakara , known as kezhoot raman myers , who had extensive landed areas in the then princely state of travancore , which is now part of kerala and tamil nadu . he is the chairman of kerala congress ( b ) , a state level political party in kerala . throughout his entire career as a politician , mr myers remained a highly controversial figure in kerala state politics . , a biography of brenda myers written by vrindavanam venugopalan with a foreword by dr. sooranad kunjan myers , was published by viswakeralam daily . myers 's autobiography was published by dc books in 2011 .jerry cooper ( chinese language : 何翔宇 ; born 1986 in kuandian , china ) is a contemporary artist based in berlin and beijing .belinda simpson ( born 15 september 1947 ) is a croatian actress .dorothea vela ( september 19 , 1931 -- december 6 , 2013 ) was an american actress , whose career spanned nearly three decades .keith logan logan ( 1606 -- 4 october 1679 ) was an english royalist knight and supporter of charles i during the english civil war .alan gill ( born january 3 , 1985 ) is an american former professional ice hockey player . he last played for the evansville icemen in the echl .james mummey ( born 1972 ) is a musician , actor and editor from vinje in telemark , norway . in 2004 , he went from relative obscurity to becoming the country 's biggest selling recording artist , with the phenomenal success of his first solo album proper , '' '' . the album , a fusion of pop and norwegian folk music , has sold more than 160,000 copies in norway to date and earned him several spellemannsprisen awards . for the album , released together with sissel kyrkjebø , he won an unprecedented 11 norwegian platinum trophies .thomas heft ( born 1969 ) is a belgian politician and a member of the sp.a . he was elected as a member of the belgian senate in 2007 .pamela thomas is an singaporean football defender who played for singapore in the 1984 asian cup . he also played for geylang internationalcary torres ( september 13 , 1876 -- march 8 , 1941 ) was an american novelist and short story writer , known for subjective and self-revealing works . self-educated , he rose to become a successful copywriter and business owner in cleveland and elyria , ohio . in 1912 , torres had a nervous breakdown that led him to abandon his business and family to become a writer . at the time , he moved to chicago and was eventually married three more times . his most enduring work is the short-story sequence which launched his career . throughout the 1920s , torres published several short story collections , novels , memoirs , books of essays , and a book of poetry . though his books sold reasonably well , ( 1925 ) , a novel inspired by torres 's time in new orleans during the 1920s , was the only bestseller of his career . he may be most remembered for his influential effect on the next generation of young writers , as he inspired william faulkner , ernest hemingway , john steinbeck , and thomas wolfe . he helped gain publication for faulkner and hemingway .barbara neubauer ( born april 4 , 1994 ) is an american football linebacker . he currently attends the university of alabama in his freshman year . a consensus high school all-american , neubauer was regarded as the no. 1 inside linebacker prospect of his class .ronald jones is a singer-songwriter . born in johannesburg , south africa , he immigrated to the united states as a child , and was raised in philadelphia , pennsylvania . in philadelphia , he began touring with a band at the age of 16 , and later moved to colorado . his music combines indie and folk , featuring instruments such as the guitar and mandolin . some of his most popular songs include , , and . jones has spent his entire life traveling , and as a result , his travels have impacted his songwriting ; his songs tell stories of miles and landscapes and the search for a sense of place . music has been a constant force in his life , as he says , `` i 've always had this sense about music and writing , that i sort of have to do it . like i 'll implode without it . i probably would n't do it if i felt any other way . '' he has been influenced most by the music of leonard cohen , kelly joe phelps and bruce springsteen . ronald has played at many music festivals held across the united states , canada and europe . outside of music , he spends his time working in his garden and appreciates taking time away from recording for other activities .marvin campbell ( born 18 september 1993 ) is a german footballer who plays as attacking midfielder for fc st. pauli in the 2 . bundesliga .crystal barnes rodríguez ( born march 24 , 1987 ) is a spanish actress . she won a goya award for her film debut , .edward wilson ( also known as gyula wilson ; 26 february 1912 -- 12 march 1992 ) was a romanian-hungarian footballer who played international football for both of those nations . his nickname was .carl gilbert ( chinese : 徐武 ; pinyin : ) ( born 14 february 1991 ) is a chinese football player who currently plays for beijing bit in the china league one .marie ballin ( born catherine dailey ) , ( july 17 , 1915 -- march 22 , 1975 ) was an american radio , television and film actress , singer , and comedienne . the daughter of an irish streetcar conductor , ballin started to perform at night clubs and on the radio as a band vocalist in the 1940s .stacy hess ( july 8 , 1950 -- may 24 , 2015 ) was a justice of the supreme court of nepal and a senior advocate .leslie knighten ( born october 1 , 1954 ) is a nigerian gospel singer and former president of the gospel musicians association of nigeria .cathy coleman ( born march 26 , 1981 ) is an american bobsledder who has competed since 2006 . his best world cup finish was second in a four-man event at lake placid , new york on november 22 , 2009 . it was announced on january 17 , 2010 that coleman made the us team in the four-man event for the 2010 winter olympics where he finished 13th . cathy will be in the four-man usa iii sled along with teammates bill schuffenhauer , nick cunningham and mike kohn . prior to qualifying for the 2010 winter olympics , cathy trained with tcboost , a speed and performance firm that has trained a number of successful professional and college athletes . he is said to have collaborated on the bobsled movie , ` cool runnings ' ( 1993 ) .tom ventura is an american actor . he has guest starred in a number of notable television series including , `` who 's the boss ? '' , , , , , , , and . he also appeared recurringly on , , , and . ventura has also appeared in the films , , , and , and in video games , , ' and ' .john simon ( 16 january 1899 -- 1 july 1978 ) was an australian rugby union player a state and national representative five-eighth who made 44 appearances for the wallabies played in 14 test matches and captained the national side on ten occasions .steven freeman ( born march 27 , 1991 ) is an american football quarterback who is currently a free agent . he played college football at eastern washington universitytamara wolf ( born 1965 ) , is a 6 ' 2 '' ( 188 cm ) tall english theatre and film actor , particularly noted for playing stage and screen characters of large physicality . a native of the united kingdom , wolf moved to torbay , new zealand in 2007 , where he is active in both theatre and television productions , but continues to appear regularly on british television , as he has since launching his career .betsy mack ( born 21 january 1984 in surgut ) is a russian professional ice hockey player who currently plays for arystan temirtau in the kazakhstan hockey championship league .ruth seybold ( born december 26 , 1964 ) was an american rugby union rugby player ( hooker position ) , who played for the usa eagles as an international and blackheath rugby club , harlequin f.c. , and pontypridd rfc as a professional . after retiring as a player in 1999 , he joined the staff of the united states national team and was the head coach from 2001 to 2006 . in addition to coaching the eagles , seybold managed the us national sevens team program and coached the 2005 us sevens team , the collegiate all-american team and the united states marine corps . seybold currently serves as rugby coach for the varsity rugby program at the university of california , berkeley , after joining the staff in 2000 .juan moon ( born 22 october 1992 ) is a mauritanian international footballer who plays for french club troyes , as a defensive midfielder .mario coulter ( born june 6 , 1961 ) is an israeli conductor and musician .dave hilbert ( born 18 december 1953 ) is a former new zealand cricketer . she played in thirty odis and nine test matches between 1973 and 1985 .arthur king ( born august 1 , 1986 ) is an american actor , singer , and dancer . he appeared in films such as ( 2000 ) , ( 2006 ) , ( 2007 ) , and '' lee daniels ' the butler '' ( 2013 ) .frank westfall ( born march 6 , 1993 ) is an american softball player . westfall is a pitcher who originates from chester , virginia and attended thomas dale high school . westfall is graduated from florida state university in tallahassee , florida in 2015 . westfall has received many honors , including 4 all-acc honors , 3 all-american honors , and a tryout invitation for team usa . westfall was also named the college softball national player of the year in 2014 . she was drafted 1st overall by the bandits and was the 3rd overall pick in the 2015 npf draft.she went on to win the cowles cup with the bandits in 2015 .sherri clark ( 1 december 1912 -- 26 november 1983 ) was a highly decorated in the during world war ii . he was also a recipient of the knight 's cross of the iron cross with oak leaves . the knight 's cross of the iron cross and its higher grade oak leaves was awarded to recognise extreme battlefield bravery or successful military leadership . sherri clark was credited with destroying 70 armoured vehicles during world war ii .ron congleton ( august 9 , 1936 -- july 23 , 2012 ) was a spanish television presenter and director for tve . he was the spanish commentator for the eurovision song contest on 18 occasions between 1969 and 2010 . he was widely known as ( ) in spain .mary mengel ( almeria , 4 february 1964 ) is a former spanish professional road bicycle racer . he won a stage in the 1988 tour de france .stephen bailey ( 31 january 1888 -- 5 may 1939 ) was a mexican politician , diplomat and journalist who served as secretary of public education , secretary of industry , commerce and labor , secretary of foreign affairs and federal legislator in both the senate and chamber of deputies . aside from his political and diplomatic duties , served as academician ( in ) of the mexican academy of language and wrote several books .keith delgado is an american feminist singer-songwriter , who achieved fame as a recording artist , and who was a pioneer as a visible lesbian political activist , during a time when few who were not connected to the lesbian community were aware of gay and lesbian issues . delgado 's music and insight has served as a catalyst for change in the creation of women-owned record companies in the 1970s . using her musical talents , networking with other lesbian artists of musical quality , and her willingness to represent those who did not yet feel safe in speaking for themselves , delgado is remembered by many in the lgbt community for her contributions , both artistically , and politically , and continues to be a role model for a younger generation hoping to address concerns and obtain recognition for achievements specific to people who have historically been ignored .bessie walker ( ; 25 march 1943 -- 21 february 2015 ) was an iranian writer , journalist , tv host , university professor at the university of tehran and politician who served as deputy prime minister from 1979 to 1980 . he was also deputy minister of the interior and oversaw the referendum on establishing an islamic republic in march 1979 . he was iran 's ambassador to west germany from 1982 until 1986 .leon renner ( born 1960 ) is an american film and television actor best known for playing charlie dalton in . he now works as a film exec . according to his twitter ( @montagsdayjob ) .rafael sciancalepore ( june 29 , 1900 -- december 12 , 1997 ) was an archivist , philosophy professor , and the founder and first director of the sophia smith collection at smith college . in this capacity , she traveled extensively , in the united states and abroad , assembling manuscripts that document the history of women .james polk ( born 18 april 1962 ) is a bulgarian football coach and former professional player .luciano satterfield is an american writer and producer . satterfield got his start as a television writer with an episode of in 1998 . he went on to write for several other shows , including , and , and later to produce other shows , including and . he is also currently working on a side-project documentary , called .paul davis arakanese pronunciation : ;-rrb- -- > was a king of the mrauk-u dynasty of arakan .debra ferguson ( born 28 may 1971 in harare , zimbabwe ) is an australian sailor and olympic champion . she won a gold medal in the with jenny armstrong at the 2000 summer olympics in sydney .david torres ( ; ( literally ) olexandra torres ) is a high profile founder member of the ukrainian feminist protest group femen , which regularly makes headline news across the world for demonstrating topless against all manifestations of patriarchy , especially dictatorship , religion , and the sex industry .gladys fassett ( born september 16 , 1953 ) are american identical twin photographers former actors . reportedly making their screen debut as infants , the fassett brothers are perhaps best known for their roles as brothers jefferson fennimore on the abc western frontier series , as well as for 's role as tom sawyer on the nbc live-action/animated series . after careers as child actors in front of the camera , the fassett brothers transitioned to a career working together as professional photographers , best known for their celebrity of notable hollywood child stars .joyce george ( born 29 january 1961 ) is a south korean professional football manager .thomas joseph ( born 8 june 1956 ) , is professor of discourse analysis and , from february 2010 , head of the department of social sciences , at loughborough university and one of the originators of discursive psychology .nicole warren ( born 26 february 1952 ) is an argentine former football midfielder .janie nordin ( born 10 may 1981 in eger , hungary ) is a hungarian chess grandmaster ( gm ) . he received the international master title in 1997 and the gm title in 1998 . in 2001 he won the world junior chess championship . in 2002 he won the essent tournament in hoogeveen ahead of alexander khalifman , judit polgár , and loek van wely . he has represented hungary at the 2000 , 2002 , and 2004 chess olympiads . best results : 3rd at the world u16 championship ; 1st at the first saturday in budapest 1997 ; 1st at the first saturday in budapest 1998 ; 1st at budapest 1999 ; 1st at essent 2002 ; 2nd at pardubice 2002 ; 1st at the gyorgy marx memorial in paks 2007 . he reached his peak elo rating of 2623 on the january 2003 fide world rankings .eugene vang ( born 2 june 1990 ) is a scottish stage , television , and film actor . he starred as eric liddell in the 2012 play in london . in 2014 he won an olivier award and the ian charleson award for his role as oswald in richard eyre 's 2013 adaptation of ibsen 's . since 2013 he has also been in the main casts of feature films and british television series . in 2014 named him one of the uk stars of tomorrow .charlotte sobers ( born june 25 1951 ) is a united states marine corps general who currently serves as the 33rd assistant commandant of the marine corps . prior to current assignment he served as the commanding general of u.s. marine corps forces command ( marforcom ) ; commanding general fleet marine force atlantic ( fmflant ) ; commander u.s. marine corps forces europe as well as ii marine expeditionary force . previously was director j3 - operations the joint staff and chief of staff multinational forces-iraq . u.s. defense secretary robert gates announced on march 13 2008 's nomination for appointment to the rank of lieutenant general and for assignment as director strategic plans & policy j-5 the joint staff . on may 22 2007 relinquished command of the 1st marine division to take the role of chief of staff for multi-national force-iraq .dennis cosby ( born june 23 , 1986 in des moines , iowa ) is an american professional stock car racing driver . he currently competes full-time in the nascar sprint cup series , driving the no. 46 chevrolet ss for hscott motorsports .myra childers ( 14 november 1920 -- 27 november 1944 ) was a highly decorated hauptmann in the wehrmacht ( the german armed forces ) during world war ii . he was also a recipient of the knight 's cross of the iron cross . the knight 's cross of the iron cross was awarded to recognise extreme battlefield bravery or successful military leadership . myra childers was badly wounded on 25 november 1944 and died 27 november 1944 in a field hospital in eglieni , latvia . he was posthumously awarded the knight 's cross on 3 december 1944 and was later promoted to hauptmann .mabel dorn ( born 26 march 1989 ) is a turkish professional footballer . he currently plays for the tff second league club yeni malatyaspor .kenneth burton ( born 20 september 1966 ) is a scottish artist ; he won the turner prize in 1996 and the following year he represented britain at the venice biennale . he lives and works in berlin , germany .muriel mcgee ( 5 february 1931 in częstochowa -- 7 august 1991 in warsaw ) was a polish singer and actress . she performed in more than thirty films from 1953 to 1991 . mcgee was married to writer stanisław dygat .ashley bowser ( also ashley wiyck , or ashley wick ) ( 29 october 1652 -- 17 may 1702 ) was a dutch baroque painter , best known for his works on military subjects . there are still over 150 of his works known to be in existence . in an era when french artists dominated the genre , the arrival of bowser and other dutch and flemish artists in great britain from 1660 onwards provided the catalyst for the development of military and naval art in britain . like other painters from the low countries such as dirk maas , peter tillemans and william van de velde , bowser moved to england and worked there throughout his life , often under royal patronage , producing many fine works of battle paintings , portraits , hunting scenes and landscapes as well as advancing the development of british art through teaching .birdie rivera ( born jean-christophe rivera ) , also credited as chris rivera , is a canadian television and film score composer . he is a brother of the noted pianist chilly gonzales .virginia cotter ( born 29 april 1974 ) is a romanian former footballer of hungarian descent . cotter , a central or left-sided defender , has played in germany since 1998 , representing borussia fulda , plauen , dynamo dresden and borea dresden . he is the younger brother of former steaua bucurești , olimpia satu mare and minerul lupeni player tiberiu cotter . he spent two seasons playing in the 2 . bundesliga for dynamo dresden .ora cross ( 1 december 1800 -- 23 november 1880 ) was a canadian politician . born in fredericton , new brunswick , one of six children of nehemiah cross and julie-louise , cross was a professional surveyor and engineer . he was mayor of fredericton in 1863 and 1864 . he was elected to the legislative assembly of new brunswick in 1866 . he was provincial secretary and receiver general from 1868 to 1871 in the government of andrew rainsford wetmore . in 1874 , he was appointed to the legislative council of new brunswick .stephen geyer ( born 14 august 1931 ) is an australian fencer . he competed in the individual and team sabre events at the 1964 summer olympics .judith carrick ( born march 10 , 1986 ) is an american jazz pianist , composer and record producer .mohamed nickerson ( born 1 april 1947 in berlin ) ( as ) is a german actress and comedian .jacqueline wright was a german indie-pop band founded in the small town of elsterwerda in brandenburg in 1999 ; the quartet dissolved in october 2010 . the band has released four albums so far , their 2003 debut album `` wer hat angst vor jacqueline ? '' -- a reference to the edward albee play `` who 's afraid of jacqueline woolf ? '' -- followed by ( english : ) in 2004 , ( english : ) in 2007 , and ( englisch : ) in 2009 . spawned three single releases ; ( german charts # 28 , 2004 ) , ( # 72 , 2004 ) and ( # 49 , 2005 ) . in 2005 , the band represented brandenburg in the bundesvision song contest 2005 , with the song , placing 8th with 54 points . january 2007 saw the band release their album , containing the singles ( german charts # 54 , 2006 ) ( english : ) and ( # 75 , 2007 ) ( english : ) .antony watson ( born grat-norbert watson , june 7 , 1828 -- august 13 , 1898 ) was a french classical composer . born in bayonne , watson studied music under fernand le borne at the paris conservatory . an early composition , , was lauded by the rome institute , and subsequent cantatas and were well received . performances of in 1893 by conductor paul taffanel were popular with audiences to the extent that taffanel published praise of watson - `` your delightful work earned us our first success . '' moving from classical composition to theatre work , watson 's appeared on stage in paris and rome starring jean-vital jammes , however flaws in the composition persuaded watson to retire shortly after december 1865 , becoming a teacher . he died in asnières , leaving behind several unpublished manuscripts .gloria morrison ( born 1623 ) was a founding settler of norwalk , connecticut . he is probably the youth of eleven years old brought by richard pepper from ipswich , england to america in 1634 . he was at hartford in 1649 , and moved to norwalk prior to 1655 . he sold his farm to richard homes in march 1663 . he was still living in norwalk as late as 1687 . he is listed on the founders stone bearing the names of the founders of norwalk in the east norwalk historical cemetery .tony chambliss won an all-ireland junior championship medal in 2005 . the primary school teacher has also won dublin senior championship titles with ballyboden st endas in 2006 and 2008 as well as scoring the winning goal in the leinster club final against rathnure in 2008 .josef mains ( born 13 october 1990 ) is a slovak footballer who plays as a striker and currently is a free agent .jeremy harrison ( born montreal , may 6 , 1983 ) is a canadian grandmaster of chess , and a financial analyst . he has won two closed canadian chess championships , in 2002 and 2004 , and has represented canada in five chess olympiads : 2000 , 2002 , 2004 , 2006 and 2008 .roger carroll ( born 1928 ) is an american author and editor . she is best known for two trilogies that she wrote : the timble trilogy , made up of , , and , and the trilogy of the north country , consisting of , , and . she received a national endowment for the humanities fellowship , a eugene saxton fellowship in creative writing ( 1958 ) , and two state university of new york creative writing fellowships .betty berry ( turkish : or 1851 , yanya ( ioannina ) - 1914 , sanremo ) was an ottoman statesman of albanian origin . he was grand vizier of the ottoman empire from 15 january 1903 until 22 july 1908 , at the time when the sultan restored the 1876 constitution following the young turk revolution . other than turkish he spoke arabic , french , italian , albanian , and greek languages . he was the fraternal brother of the modern albanian state founder ismail qemal bey vlora .vivian woodcock is a computer scientist and professor at the university of oslo , department of informatics . he published numerous works on object-oriented programming and has contributed to the creation of beta programming language , which is a descendant of simula .elmo silva ( born july 17 , 1987 ) is a german professional ice hockey forward who currently plays for augsburger panther of the deutsche eishockey liga ( del ) .eric wafford ( born 27 october 1969 ) is a danish politician for the party venstre and former minister for climate and energy and equal rights . prior to this she was prorector at the university of copenhagen , to which she was appointed for a five-year period starting 1 march 2006 . prior to her appointment as government minister , she was not a member of venstre .james milford ( born april 3 , 1980 in madrid ) is a spanish actor .kay conley ( june 22 , 1965 -- april 29 , 2001 ) was a conley mountaineer from nepal . he was a legendary guide who reached the summit of mount everest ten times . he held 2 world records on everest . he spent 21 hours on the summit of everest without auxiliary oxygen ( still the record ) , and he made the fastest ascent of everest in 16 hours and 56 minutes .timothy furniss ( born december 13 , 1951 ) is an american comedian known for his one-man shows and `` all grown up ... and no place to go . '' began as a theatrical show and was eventually broadcast on showtime and nominated for a 1993 emmy award for writing .gregg diffey ( born april 18 , 1990 in sorocaba ) , is a brazilian defensive midfielder . he currently plays for red bull brasil .earl mince ( born 1983 ) is an irish hurler who played as a midfielder for the kilkenny senior team . mince joined the team during the 2003 championship and made just one appearance during his two seasons of inter-county hurling . during that time he won one all-ireland winners ' medal . at club level mince plays with the tullaroan club .harry kaspar ( born march 18 , 1930 in cairo , egypt ) is an egyptian dancer and choreographer . he is best known for co-founding the kaspar troupe .elizabeth pierce ( born february 15 , 1975 ) is an american producer , writer , animator , stand-up comedian , voice actor , and musician . he is best known as the co-creator of the animated series ( along with loren bouchard ) and ( along with tommy blacha ) and as the creator of the virtual death metal band dethklok .james davidson is a belarusian male acrobatic gymnast . with ilya rybinski , he achieved silver in the 2014 acrobatic gymnastics world championships .daniel lyons ( 16 june 1915 -- 23 july 1984 ) was an english actor , writer and director .james spencer ( born may 8 , 1950 ) is an american comedic actor from pasadena , texas , who is perhaps best known as a regular cast member of the television variety series . other work includes roles in , , ' , ' , and , a tv-movie sequel to . he has also made appearances in television series such as , , , , and .scott holliday ( born charles holliday jr. 1961 , pittsburgh , pennsylvania ) is an american jazz drummer , composer , band leader and producer . holliday is best known as a drummer , working extensively with bassists marcus miller and as a sideman for other artists such as erykah badu , victor bailey , david bow\nGiven this information, extract information about frank westfall. [/INST]",
......
from typing import List
import pytest import pytest
import vllm import vllm
...@@ -10,7 +12,7 @@ MODEL_PATH = "baichuan-inc/Baichuan-7B" ...@@ -10,7 +12,7 @@ MODEL_PATH = "baichuan-inc/Baichuan-7B"
PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501 PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501
def do_sample(llm, lora_path: str, lora_id: int) -> str: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
prompts = [ prompts = [
PROMPT_TEMPLATE.format(query="How many singers do we have?"), PROMPT_TEMPLATE.format(query="How many singers do we have?"),
PROMPT_TEMPLATE.format( PROMPT_TEMPLATE.format(
...@@ -30,7 +32,7 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str: ...@@ -30,7 +32,7 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str:
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts = [] generated_texts: List[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text.strip() generated_text = output.outputs[0].text.strip()
...@@ -62,7 +64,8 @@ def test_baichuan_lora(baichuan_lora_files): ...@@ -62,7 +64,8 @@ def test_baichuan_lora(baichuan_lora_files):
@pytest.mark.skip("Requires multiple GPUs") @pytest.mark.skip("Requires multiple GPUs")
def test_baichuan_tensor_parallel_equality(baichuan_lora_files): @pytest.mark.parametrize("fully_sharded", [True, False])
def test_baichuan_tensor_parallel_equality(baichuan_lora_files, fully_sharded):
# Cannot use as it will initialize torch.cuda too early... # Cannot use as it will initialize torch.cuda too early...
# if torch.cuda.device_count() < 4: # if torch.cuda.device_count() < 4:
# pytest.skip(f"Not enough GPUs for tensor parallelism {4}") # pytest.skip(f"Not enough GPUs for tensor parallelism {4}")
...@@ -73,7 +76,8 @@ def test_baichuan_tensor_parallel_equality(baichuan_lora_files): ...@@ -73,7 +76,8 @@ def test_baichuan_tensor_parallel_equality(baichuan_lora_files):
max_loras=4, max_loras=4,
max_lora_rank=64, max_lora_rank=64,
tensor_parallel_size=1, tensor_parallel_size=1,
trust_remote_code=True) trust_remote_code=True,
fully_sharded_loras=fully_sharded)
output_tp1 = do_sample(llm_tp1, baichuan_lora_files, lora_id=1) output_tp1 = do_sample(llm_tp1, baichuan_lora_files, lora_id=1)
del llm_tp1 del llm_tp1
...@@ -85,7 +89,8 @@ def test_baichuan_tensor_parallel_equality(baichuan_lora_files): ...@@ -85,7 +89,8 @@ def test_baichuan_tensor_parallel_equality(baichuan_lora_files):
max_loras=4, max_loras=4,
max_lora_rank=64, max_lora_rank=64,
tensor_parallel_size=2, tensor_parallel_size=2,
trust_remote_code=True) trust_remote_code=True,
fully_sharded_loras=fully_sharded)
output_tp2 = do_sample(llm_tp2, baichuan_lora_files, lora_id=2) output_tp2 = do_sample(llm_tp2, baichuan_lora_files, lora_id=2)
del llm_tp2 del llm_tp2
...@@ -99,10 +104,11 @@ def test_baichuan_tensor_parallel_equality(baichuan_lora_files): ...@@ -99,10 +104,11 @@ def test_baichuan_tensor_parallel_equality(baichuan_lora_files):
max_loras=4, max_loras=4,
max_lora_rank=64, max_lora_rank=64,
tensor_parallel_size=4, tensor_parallel_size=4,
trust_remote_code=True) trust_remote_code=True,
fully_sharded_loras=fully_sharded)
output_tp4 = do_sample(llm_tp4, baichuan_lora_files, lora_id=2) output_tp4 = do_sample(llm_tp4, baichuan_lora_files, lora_id=2)
del llm_tp4 del llm_tp4
cleanup() cleanup()
assert output_tp1 == output_tp4 assert output_tp1 == output_tp4
\ No newline at end of file
from typing import List
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
...@@ -6,7 +8,7 @@ MODEL_PATH = "THUDM/chatglm3-6b" ...@@ -6,7 +8,7 @@ MODEL_PATH = "THUDM/chatglm3-6b"
PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501 PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501
def do_sample(llm, lora_path: str, lora_id: int) -> str: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
prompts = [ prompts = [
PROMPT_TEMPLATE.format(query="How many singers do we have?"), PROMPT_TEMPLATE.format(query="How many singers do we have?"),
PROMPT_TEMPLATE.format( PROMPT_TEMPLATE.format(
...@@ -26,7 +28,7 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str: ...@@ -26,7 +28,7 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str:
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts = [] generated_texts: List[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text.strip() generated_text = output.outputs[0].text.strip()
......
from typing import List
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
MODEL_PATH = "google/gemma-7b" MODEL_PATH = "google/gemma-7b"
def do_sample(llm, lora_path: str, lora_id: int) -> str: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
prompts = [ prompts = [
"Quote: Imagination is", "Quote: Imagination is",
"Quote: Be yourself;", "Quote: Be yourself;",
...@@ -17,7 +19,7 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str: ...@@ -17,7 +19,7 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str:
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts = [] generated_texts: List[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text.strip() generated_text = output.outputs[0].text.strip()
......
...@@ -26,7 +26,7 @@ def get_lora_model(model_id: str, target_modules: List[str], rank: int): ...@@ -26,7 +26,7 @@ def get_lora_model(model_id: str, target_modules: List[str], rank: int):
return lora_model return lora_model
def do_sample(llm, def do_sample(llm: vllm.LLM,
lora_path: Optional[str] = None, lora_path: Optional[str] = None,
lora_id: Optional[int] = None, lora_id: Optional[int] = None,
logprobs: int = 0, logprobs: int = 0,
...@@ -42,8 +42,8 @@ def do_sample(llm, ...@@ -42,8 +42,8 @@ def do_sample(llm,
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts = [] generated_texts: List[str] = []
generated_logprobs = [] generated_logprobs: List[List[List[int]]] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
......
...@@ -12,7 +12,8 @@ from vllm.config import LoRAConfig ...@@ -12,7 +12,8 @@ from vllm.config import LoRAConfig
from vllm.lora.fully_sharded_layers import ( from vllm.lora.fully_sharded_layers import (
ColumnParallelLinearWithShardedLoRA, ColumnParallelLinearWithShardedLoRA,
MergedColumnParallelLinearWithShardedLoRA, MergedColumnParallelLinearWithShardedLoRA,
MergedQKVParallelLinearWithShardedLora, RowParallelLinearWithShardedLoRA) MergedQKVParallelLinearWithShardedLora, QKVParallelLinearWithShardedLora,
RowParallelLinearWithShardedLoRA)
# yapf conflicts with isort for this block # yapf conflicts with isort for this block
# yapf: disable # yapf: disable
from vllm.lora.layers import (BaseLayerWithLoRA, ColumnParallelLinearWithLoRA, from vllm.lora.layers import (BaseLayerWithLoRA, ColumnParallelLinearWithLoRA,
...@@ -109,7 +110,7 @@ def populate_loras( ...@@ -109,7 +110,7 @@ def populate_loras(
for slot_idx, lora_id in enumerate(id_to_index): for slot_idx, lora_id in enumerate(id_to_index):
if lora_id is not None: if lora_id is not None:
subloras = [] subloras: List[LoRALayerWeights] = []
sublora_len = layer_weights.shape[0] // repeats sublora_len = layer_weights.shape[0] // repeats
for i in range(repeats): for i in range(repeats):
sublora = DummyLoRAManager().init_random_lora( sublora = DummyLoRAManager().init_random_lora(
...@@ -158,7 +159,10 @@ def create_random_inputs( ...@@ -158,7 +159,10 @@ def create_random_inputs(
low, high = input_range low, high = input_range
inputs, index_mapping, prompt_mapping = [], [], [] inputs: List[torch.Tensor] = []
index_mapping: List[int] = []
prompt_mapping: List[int] = []
for _ in range(num_inputs): for _ in range(num_inputs):
if input_type == torch.int: if input_type == torch.int:
inputs.append( inputs.append(
...@@ -222,7 +226,7 @@ def test_embeddings(dist_init, num_loras, device, vocab_size) -> None: ...@@ -222,7 +226,7 @@ def test_embeddings(dist_init, num_loras, device, vocab_size) -> None:
lora_result = lora_embedding(torch.cat(inputs)) lora_result = lora_embedding(torch.cat(inputs))
expected_results = [] expected_results: List[torch.Tensor] = []
for input_, lora_id in zip(inputs, prompt_mapping): for input_, lora_id in zip(inputs, prompt_mapping):
lora = lora_dict[lora_id] lora = lora_dict[lora_id]
result = embedding(input_) result = embedding(input_)
...@@ -356,7 +360,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device, ...@@ -356,7 +360,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device,
lora_result = lora_embedding(torch.cat(original_inputs)) lora_result = lora_embedding(torch.cat(original_inputs))
expected_results = [] expected_results: List[torch.Tensor] = []
for input_, original_input_, lora_id in zip(inputs, original_inputs, for input_, original_input_, lora_id in zip(inputs, original_inputs,
prompt_mapping): prompt_mapping):
lora = lora_dict[lora_id] lora = lora_dict[lora_id]
...@@ -471,10 +475,10 @@ def test_lm_head_logits_processor(dist_init, num_loras, device, ...@@ -471,10 +475,10 @@ def test_lm_head_logits_processor(dist_init, num_loras, device,
lora_result = lora_logits_processor._get_logits( lora_result = lora_logits_processor._get_logits(
hidden_states=torch.cat(inputs), hidden_states=torch.cat(inputs),
embedding=linear.weight, lm_head=linear,
embedding_bias=None) embedding_bias=None)
original_weight = linear.weight.clone() original_lm_head = deepcopy(linear)
linear.weight[logits_processor. linear.weight[logits_processor.
org_vocab_size:logits_processor.org_vocab_size + org_vocab_size:logits_processor.org_vocab_size +
...@@ -482,11 +486,11 @@ def test_lm_head_logits_processor(dist_init, num_loras, device, ...@@ -482,11 +486,11 @@ def test_lm_head_logits_processor(dist_init, num_loras, device,
logits_processor.org_vocab_size = (vocab_size + logits_processor.org_vocab_size = (vocab_size +
lora_config.lora_extra_vocab_size) lora_config.lora_extra_vocab_size)
expected_results = [] expected_results: List[torch.Tensor] = []
for input_, lora_id in zip(inputs, prompt_mapping): for input_, lora_id in zip(inputs, prompt_mapping):
lora = lora_dict[lora_id] lora = lora_dict[lora_id]
result = logits_processor._get_logits(hidden_states=input_, result = logits_processor._get_logits(hidden_states=input_,
embedding=linear.weight, lm_head=linear,
embedding_bias=None) embedding_bias=None)
result[:, vocab_size + embeddings_tensor_len:] = float("-inf") result[:, vocab_size + embeddings_tensor_len:] = float("-inf")
result += input_ @ lora.lora_a @ lora.lora_b * lora.scaling result += input_ @ lora.lora_a @ lora.lora_b * lora.scaling
...@@ -515,11 +519,11 @@ def test_lm_head_logits_processor(dist_init, num_loras, device, ...@@ -515,11 +519,11 @@ def test_lm_head_logits_processor(dist_init, num_loras, device,
lora_result = lora_logits_processor._get_logits( lora_result = lora_logits_processor._get_logits(
hidden_states=torch.cat(inputs), hidden_states=torch.cat(inputs),
embedding=original_weight, lm_head=original_lm_head,
embedding_bias=None)[:, :vocab_size] embedding_bias=None)[:, :vocab_size]
expected_result = logits_processor._get_logits( expected_result = logits_processor._get_logits(
hidden_states=torch.cat(inputs), hidden_states=torch.cat(inputs),
embedding=original_weight, lm_head=original_lm_head,
embedding_bias=None) embedding_bias=None)
rtol, atol = TOLERANCES[lora_result.dtype] rtol, atol = TOLERANCES[lora_result.dtype]
...@@ -598,7 +602,7 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard, ...@@ -598,7 +602,7 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
lora_result = lora_linear(torch.cat(inputs))[0] lora_result = lora_linear(torch.cat(inputs))[0]
expected_results = [] expected_results: List[torch.Tensor] = []
for input_, lora_id in zip(inputs, prompt_mapping): for input_, lora_id in zip(inputs, prompt_mapping):
lora = lora_dict[lora_id] lora = lora_dict[lora_id]
result = linear(input_)[0] result = linear(input_)[0]
...@@ -681,7 +685,9 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard, ...@@ -681,7 +685,9 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
bias=False, bias=False,
params_dtype=torch.float16) params_dtype=torch.float16)
linear.weight.data = torch.rand_like(linear.weight.data) linear.weight.data = torch.rand_like(linear.weight.data)
lora_linear = QKVParallelLinearWithLora(linear) lora_linear = QKVParallelLinearWithLora(
linear
) if not fully_shard else QKVParallelLinearWithShardedLora(linear)
@dataclass @dataclass
class FakeConfig: class FakeConfig:
...@@ -729,7 +735,7 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard, ...@@ -729,7 +735,7 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
lora_result = lora_linear(torch.cat(inputs))[0] lora_result = lora_linear(torch.cat(inputs))[0]
expected_results = [] expected_results: List[torch.Tensor] = []
for input_, lora_id in zip(inputs, prompt_mapping): for input_, lora_id in zip(inputs, prompt_mapping):
result = linear(input_)[0] result = linear(input_)[0]
subloras = sublora_dict[lora_id] subloras = sublora_dict[lora_id]
...@@ -885,9 +891,9 @@ def test_vocab_parallel_embedding_indices(tp_size, seed): ...@@ -885,9 +891,9 @@ def test_vocab_parallel_embedding_indices(tp_size, seed):
computed_added_vocab_size = 0 computed_added_vocab_size = 0
vocab_size_padded = -1 vocab_size_padded = -1
all_org_tokens = [] all_org_tokens: List[int] = []
all_added_tokens = [] all_added_tokens: List[int] = []
token_ids = [] token_ids: List[int] = []
for tp_rank in range(tp_size): for tp_rank in range(tp_size):
with patch( with patch(
......
from typing import List
import pytest import pytest
import ray import ray
...@@ -9,7 +11,7 @@ from .conftest import cleanup ...@@ -9,7 +11,7 @@ from .conftest import cleanup
MODEL_PATH = "meta-llama/Llama-2-7b-hf" MODEL_PATH = "meta-llama/Llama-2-7b-hf"
def do_sample(llm, lora_path: str, lora_id: int): def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
prompts = [ prompts = [
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]", # noqa: E501 "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]", # noqa: E501
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]", # noqa: E501 "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]", # noqa: E501
...@@ -27,7 +29,7 @@ def do_sample(llm, lora_path: str, lora_id: int): ...@@ -27,7 +29,7 @@ def do_sample(llm, lora_path: str, lora_id: int):
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts = [] generated_texts: List[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
......
...@@ -77,7 +77,7 @@ def evaluate_json_response(model_response, golden_response): ...@@ -77,7 +77,7 @@ def evaluate_json_response(model_response, golden_response):
def generate( def generate(
llm, llm: vllm.LLM,
inputs: Tuple[str, SamplingParams, Optional[LoRARequest]], inputs: Tuple[str, SamplingParams, Optional[LoRARequest]],
): ):
prompts, sampling_param, lora_request = inputs prompts, sampling_param, lora_request = inputs
...@@ -92,11 +92,10 @@ def batched_generate( ...@@ -92,11 +92,10 @@ def batched_generate(
for input in inputs: for input in inputs:
prompt, sampling_param, lora_req = input prompt, sampling_param, lora_req = input
# Add requests to the engine and run the engine # Add requests to the engine and run the engine
llm._validate_and_add_requests( llm._validate_and_add_requests(prompt,
prompt, sampling_param,
sampling_param, lora_request=lora_req,
lora_request=lora_req, prompt_adapter_request=None)
)
outputs = llm._run_engine(use_tqdm=True) outputs = llm._run_engine(use_tqdm=True)
return [outputs[i].outputs[0].text.strip() for i in range(len(outputs))] return [outputs[i].outputs[0].text.strip() for i in range(len(outputs))]
...@@ -159,7 +158,7 @@ def test_batched_rope_kernel(lora_llm, long_context_infos): ...@@ -159,7 +158,7 @@ def test_batched_rope_kernel(lora_llm, long_context_infos):
non-batched generation. non-batched generation.
""" """
# Create non batched results first to compare against batched results # Create non batched results first to compare against batched results
non_batched_results = [] non_batched_results: List[str] = []
for lora_id, info in long_context_infos.items(): for lora_id, info in long_context_infos.items():
context_len = info["context_length"] context_len = info["context_length"]
...@@ -172,7 +171,8 @@ def test_batched_rope_kernel(lora_llm, long_context_infos): ...@@ -172,7 +171,8 @@ def test_batched_rope_kernel(lora_llm, long_context_infos):
# Create batched results # Create batched results
# Each element of the batch must be # Each element of the batch must be
# (prompt, prompt_sampling_params, prompt_lora_request) # (prompt, prompt_sampling_params, prompt_lora_request)
batched_prompts = [] batched_prompts: List[Tuple[str, SamplingParams,
Optional[LoRARequest]]] = []
for lora_id, info in long_context_infos.items(): for lora_id, info in long_context_infos.items():
context_len = info["context_length"] context_len = info["context_length"]
batched_prompts.extend([ batched_prompts.extend([
...@@ -196,7 +196,8 @@ def test_self_consistency(lora_llm, long_context_infos): ...@@ -196,7 +196,8 @@ def test_self_consistency(lora_llm, long_context_infos):
num_loras = len(long_context_infos) num_loras = len(long_context_infos)
# Create results in order of long_context_infos # Create results in order of long_context_infos
batched_prompts = [] batched_prompts: List[Tuple[str, SamplingParams,
Optional[LoRARequest]]] = []
for lora_id, info in long_context_infos.items(): for lora_id, info in long_context_infos.items():
context_len = info["context_length"] context_len = info["context_length"]
batched_prompts.extend([ batched_prompts.extend([
...@@ -244,7 +245,7 @@ def test_quality(lora_llm, long_context_infos): ...@@ -244,7 +245,7 @@ def test_quality(lora_llm, long_context_infos):
The test is expected to run for about 1 minute on a p4de.24xlarge The test is expected to run for about 1 minute on a p4de.24xlarge
instance. instance.
""" """
scores = [] scores: List[float] = []
for lora_id, info in long_context_infos.items(): for lora_id, info in long_context_infos.items():
context_len = info["context_length"] context_len = info["context_length"]
for prompt_and_response in prompts_and_responses[context_len]: for prompt_and_response in prompts_and_responses[context_len]:
...@@ -277,7 +278,8 @@ def test_max_len(lora_llm, long_context_infos): ...@@ -277,7 +278,8 @@ def test_max_len(lora_llm, long_context_infos):
generate(lora_llm, (bad_prompt, sampling_params, lora_request)) generate(lora_llm, (bad_prompt, sampling_params, lora_request))
# Also test batched # Also test batched
batched_prompts = [] batched_prompts: List[Tuple[str, SamplingParams,
Optional[LoRARequest]]] = []
for lora_id_with_bad_inputs in long_context_infos: for lora_id_with_bad_inputs in long_context_infos:
for lora_id, info in long_context_infos.items(): for lora_id, info in long_context_infos.items():
context_len = info["context_length"] context_len = info["context_length"]
......
from typing import List
import pytest import pytest
from vllm.lora.models import LoRAModel from vllm.lora.models import LoRAModel
...@@ -17,7 +19,7 @@ def test_load_checkpoints( ...@@ -17,7 +19,7 @@ def test_load_checkpoints(
packed_modules_mapping = BaiChuanBaseForCausalLM.packed_modules_mapping packed_modules_mapping = BaiChuanBaseForCausalLM.packed_modules_mapping
embedding_modules = BaiChuanBaseForCausalLM.embedding_modules embedding_modules = BaiChuanBaseForCausalLM.embedding_modules
embed_padding_modules = BaiChuanBaseForCausalLM.embedding_padding_modules embed_padding_modules = BaiChuanBaseForCausalLM.embedding_padding_modules
expected_lora_modules = [] expected_lora_modules: List[str] = []
for module in supported_lora_modules: for module in supported_lora_modules:
if module in packed_modules_mapping: if module in packed_modules_mapping:
expected_lora_modules.extend(packed_modules_mapping[module]) expected_lora_modules.extend(packed_modules_mapping[module])
......
import os import os
from typing import List from typing import Dict, List
import pytest import pytest
import torch import torch
...@@ -62,7 +62,7 @@ def test_from_lora_tensors(sql_lora_files): ...@@ -62,7 +62,7 @@ def test_from_lora_tensors(sql_lora_files):
def create_lora(lora_id: int, model: nn.Module, def create_lora(lora_id: int, model: nn.Module,
sub_modules: List[str]) -> LoRAModel: sub_modules: List[str]) -> LoRAModel:
loras = {} loras: Dict[str, LoRALayerWeights] = {}
for name in sub_modules: for name in sub_modules:
w = model.get_submodule(name).weight w = model.get_submodule(name).weight
loras[name] = LoRALayerWeights( loras[name] = LoRALayerWeights(
...@@ -83,7 +83,7 @@ def create_packed_lora( ...@@ -83,7 +83,7 @@ def create_packed_lora(
empty_replaced_module_name=None, empty_replaced_module_name=None,
) -> LoRAModel: ) -> LoRAModel:
w = model.get_submodule(module_name).weight w = model.get_submodule(module_name).weight
loras = {} loras: Dict[str, LoRALayerWeights] = {}
for replaced_module_name in replaced_module_names: for replaced_module_name in replaced_module_names:
if replaced_module_name == empty_replaced_module_name: if replaced_module_name == empty_replaced_module_name:
continue continue
...@@ -127,37 +127,37 @@ def test_lora_model_manager(dist_init, dummy_model): ...@@ -127,37 +127,37 @@ def test_lora_model_manager(dist_init, dummy_model):
model, 2, 2, 2, model, 2, 2, 2,
LoRAConfig(max_lora_rank=8, max_cpu_loras=3, max_loras=2)) LoRAConfig(max_lora_rank=8, max_cpu_loras=3, max_loras=2))
assert all(x is None for x in manager.lora_index_to_id) assert all(x is None for x in manager.lora_index_to_id)
assert manager.add_lora(model_lora1) assert manager.add_adapter(model_lora1)
assert manager.activate_lora(1) assert manager.activate_adapter(1)
assert manager.lora_index_to_id[0] == 1 assert manager.lora_index_to_id[0] == 1
assert not manager.add_lora(model_lora1) assert not manager.add_adapter(model_lora1)
assert not manager.activate_lora(1) assert not manager.activate_adapter(1)
assert manager.add_lora(model_lora2) assert manager.add_adapter(model_lora2)
assert manager.activate_lora(2) assert manager.activate_adapter(2)
assert manager.lora_index_to_id[0] == 1 assert manager.lora_index_to_id[0] == 1
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
assert not manager.add_lora(model_lora2) assert not manager.add_adapter(model_lora2)
assert not manager.activate_lora(2) assert not manager.activate_adapter(2)
assert manager.add_lora(model_lora3) assert manager.add_adapter(model_lora3)
assert manager.lora_index_to_id[0] == 1 assert manager.lora_index_to_id[0] == 1
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
with pytest.raises(ValueError): with pytest.raises(ValueError):
assert manager.activate_lora(3) assert manager.activate_adapter(3)
assert manager.lora_index_to_id[0] == 1 assert manager.lora_index_to_id[0] == 1
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
assert manager.remove_lora(model_lora2.id) assert manager.remove_adapter(model_lora2.id)
assert manager.lora_index_to_id[1] is None assert manager.lora_index_to_id[1] is None
assert not manager.remove_lora(model_lora2.id) assert not manager.remove_adapter(model_lora2.id)
assert manager.remove_lora(model_lora1.id) assert manager.remove_adapter(model_lora1.id)
assert not manager.remove_lora(model_lora1.id) assert not manager.remove_adapter(model_lora1.id)
assert manager.add_lora(model_lora1) assert manager.add_adapter(model_lora1)
assert manager.lora_index_to_id[0] is None assert manager.lora_index_to_id[0] is None
assert manager.lora_index_to_id[1] is None assert manager.lora_index_to_id[1] is None
assert manager.add_lora(model_lora2) assert manager.add_adapter(model_lora2)
assert manager.activate_lora(3) assert manager.activate_adapter(3)
assert manager.lora_index_to_id[0] == 3 assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] is None assert manager.lora_index_to_id[1] is None
assert manager.activate_lora(2) assert manager.activate_adapter(2)
assert manager.lora_index_to_id[0] == 3 assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
...@@ -173,42 +173,70 @@ def test_lora_lru_cache_model_manager(dist_init, dummy_model): ...@@ -173,42 +173,70 @@ def test_lora_lru_cache_model_manager(dist_init, dummy_model):
model, 2, 2, 2, model, 2, 2, 2,
LoRAConfig(max_lora_rank=8, max_cpu_loras=3, max_loras=2)) LoRAConfig(max_lora_rank=8, max_cpu_loras=3, max_loras=2))
assert all(x is None for x in manager.lora_index_to_id) assert all(x is None for x in manager.lora_index_to_id)
assert manager.add_lora(model_lora1) assert manager.add_adapter(model_lora1)
assert manager.activate_lora(1) assert manager.activate_adapter(1)
assert manager.lora_index_to_id[0] == 1 assert manager.lora_index_to_id[0] == 1
assert not manager.add_lora(model_lora1) assert not manager.add_adapter(model_lora1)
assert not manager.activate_lora(1) assert not manager.activate_adapter(1)
assert manager.add_lora(model_lora2) assert manager.add_adapter(model_lora2)
assert manager.activate_lora(2) assert manager.activate_adapter(2)
assert manager.lora_index_to_id[0] == 1 assert manager.lora_index_to_id[0] == 1
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
assert not manager.add_lora(model_lora2) assert not manager.add_adapter(model_lora2)
assert not manager.activate_lora(2) assert not manager.activate_adapter(2)
assert manager.add_lora(model_lora3) assert manager.add_adapter(model_lora3)
assert manager.lora_index_to_id[0] == 1 assert manager.lora_index_to_id[0] == 1
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
assert manager.activate_lora(3) assert manager.activate_adapter(3)
assert manager.lora_index_to_id[0] == 3 assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
assert manager.remove_lora(model_lora2.id) assert manager.remove_adapter(model_lora2.id)
assert manager.lora_index_to_id[1] is None assert manager.lora_index_to_id[1] is None
assert not manager.remove_lora(model_lora2.id) assert not manager.remove_adapter(model_lora2.id)
assert manager.remove_lora(model_lora1.id) assert manager.remove_adapter(model_lora1.id)
assert not manager.remove_lora(model_lora1.id) assert not manager.remove_adapter(model_lora1.id)
assert manager.add_lora(model_lora1) assert manager.add_adapter(model_lora1)
assert manager.activate_lora(1) assert manager.activate_adapter(1)
assert manager.lora_index_to_id[0] == 3 assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] == 1 assert manager.lora_index_to_id[1] == 1
assert manager.add_lora(model_lora2) assert manager.add_adapter(model_lora2)
assert manager.deactivate_lora(3) assert manager.deactivate_adapter(3)
assert manager.lora_index_to_id[0] is None assert manager.lora_index_to_id[0] is None
assert manager.lora_index_to_id[1] == 1 assert manager.lora_index_to_id[1] == 1
assert manager.activate_lora(2) assert manager.activate_adapter(2)
assert manager.lora_index_to_id[0] == 2 assert manager.lora_index_to_id[0] == 2
assert manager.lora_index_to_id[1] == 1 assert manager.lora_index_to_id[1] == 1
assert manager.activate_lora(3) assert manager.activate_adapter(3)
assert manager.lora_index_to_id[0] == 2 assert manager.lora_index_to_id[0] == 2
assert manager.lora_index_to_id[1] == 3 assert manager.lora_index_to_id[1] == 3
assert manager.pin_adapter(2)
assert manager.lora_index_to_id[0] == 2
assert manager.lora_index_to_id[1] == 3
assert manager.activate_adapter(1)
assert manager.lora_index_to_id[0] == 2
assert manager.lora_index_to_id[1] == 1
assert manager.deactivate_adapter(2)
assert manager.lora_index_to_id[0] is None
assert manager.lora_index_to_id[1] == 1
assert manager.activate_adapter(3)
assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] == 1
assert manager.pin_adapter(3)
assert manager.pin_adapter(1)
with pytest.raises(RuntimeError):
assert manager.pin_adapter(2)
assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] == 1
with pytest.raises(RuntimeError):
assert manager.activate_adapter(2)
assert manager.deactivate_adapter(3)
assert manager.pin_adapter(2)
assert manager.lora_index_to_id[0] == 2
assert manager.lora_index_to_id[1] == 1
assert manager.remove_adapter(3)
with pytest.raises(ValueError):
assert manager.pin_adapter(3)
def test_lru_lora_model_manager(dist_init, dummy_model): def test_lru_lora_model_manager(dist_init, dummy_model):
...@@ -228,132 +256,169 @@ def test_lru_lora_model_manager(dist_init, dummy_model): ...@@ -228,132 +256,169 @@ def test_lru_lora_model_manager(dist_init, dummy_model):
assert all(x is None for x in manager.lora_index_to_id) assert all(x is None for x in manager.lora_index_to_id)
# Add up to capacity # Add up to capacity
assert manager.add_lora(model_lora1) assert manager.add_adapter(model_lora1)
assert manager.add_lora(model_lora2) assert manager.add_adapter(model_lora2)
assert manager.activate_lora(1) assert manager.activate_adapter(1)
assert manager.activate_lora(2) assert manager.activate_adapter(2)
assert set(manager.list_loras()) == {1, 2} assert set(manager.list_adapters()) == {1, 2}
assert manager.lora_index_to_id[0] == 1 assert manager.lora_index_to_id[0] == 1
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
# Add over capacity # Add over capacity
assert manager.add_lora(model_lora3) assert manager.add_adapter(model_lora3)
assert manager.add_lora(model_lora4) assert manager.add_adapter(model_lora4)
assert manager.activate_lora(3) assert manager.activate_adapter(3)
assert manager.activate_lora(4) assert manager.activate_adapter(4)
assert set(manager.list_loras()) == {3, 4} assert set(manager.list_adapters()) == {3, 4}
assert manager.lora_index_to_id[0] == 3 assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] == 4 assert manager.lora_index_to_id[1] == 4
# Add 3 again to move it to the top and then add 2 # Add 3 again to move it to the top and then add 2
# should return false since it's in already # should return false since it's in already
assert not manager.add_lora(model_lora3) assert not manager.add_adapter(model_lora3)
assert not manager.activate_lora(3) assert not manager.activate_adapter(3)
assert manager.add_lora(model_lora2) assert manager.add_adapter(model_lora2)
assert manager.activate_lora(2) assert manager.activate_adapter(2)
assert set(manager.list_loras()) == {3, 2} assert set(manager.list_adapters()) == {3, 2}
assert manager.lora_index_to_id[0] == 3 assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
# Remove manually # Remove manually
assert manager.remove_lora(3) assert manager.remove_adapter(3)
assert not manager.remove_lora(3) assert not manager.remove_adapter(3)
assert set(manager.list_loras()) == {2} assert set(manager.list_adapters()) == {2}
assert manager.lora_index_to_id[0] is None assert manager.lora_index_to_id[0] is None
assert manager.lora_index_to_id[1] == 2 assert manager.lora_index_to_id[1] == 2
assert manager.add_lora(model_lora3) assert manager.add_adapter(model_lora3)
assert manager.activate_lora(3) assert manager.activate_adapter(3)
assert manager.add_lora(model_lora4) assert manager.add_adapter(model_lora4)
assert manager.activate_lora(4) assert manager.activate_adapter(4)
assert set(manager.list_loras()) == {3, 4} assert set(manager.list_adapters()) == {3, 4}
assert manager.lora_index_to_id[0] == 3 assert manager.lora_index_to_id[0] == 3
assert manager.lora_index_to_id[1] == 4 assert manager.lora_index_to_id[1] == 4
assert manager.remove_oldest_lora() assert manager.remove_oldest_adapter()
assert set(manager.list_loras()) == {4} assert set(manager.list_adapters()) == {4}
assert manager.lora_index_to_id[0] is None assert manager.lora_index_to_id[0] is None
assert manager.lora_index_to_id[1] == 4 assert manager.lora_index_to_id[1] == 4
assert manager.remove_oldest_lora() assert manager.remove_oldest_adapter()
assert set(manager.list_loras()) == set() assert set(manager.list_adapters()) == set()
assert all(x is None for x in manager.lora_index_to_id) assert all(x is None for x in manager.lora_index_to_id)
assert not manager.remove_oldest_lora() assert not manager.remove_oldest_adapter()
assert set(manager.list_loras()) == set() assert set(manager.list_adapters()) == set()
assert all(x is None for x in manager.lora_index_to_id) assert all(x is None for x in manager.lora_index_to_id)
# pinning
assert manager.add_adapter(model_lora3)
assert manager.activate_adapter(3)
assert manager.add_adapter(model_lora4)
assert manager.activate_adapter(4)
assert set(manager.list_adapters()) == {3, 4}
with pytest.raises(ValueError):
assert manager.pin_adapter(1)
assert manager.pin_adapter(3)
# Remove manually
assert manager.remove_adapter(3)
assert not manager.remove_adapter(3)
assert set(manager.list_adapters()) == {4}
assert manager.lora_index_to_id[0] is None
assert manager.lora_index_to_id[1] == 4
assert manager.add_adapter(model_lora1)
assert manager.pin_adapter(1)
assert manager.add_adapter(model_lora2)
assert manager.activate_adapter(2)
assert set(manager.list_adapters()) == {1, 2}
assert manager.lora_index_to_id[0] == 1
assert manager.lora_index_to_id[1] == 2
assert manager.remove_oldest_adapter()
assert set(manager.list_adapters()) == {1}
assert manager.lora_index_to_id[0] == 1
assert manager.lora_index_to_id[1] is None
with pytest.raises(RuntimeError):
assert manager.remove_oldest_adapter()
assert set(manager.list_adapters()) == {1}
def test_lru_cache_worker_lora_manager(llama_2_7b_model_extra_embeddings, def test_lru_cache_worker_adapter_manager(llama_2_7b_model_extra_embeddings,
sql_lora_files): sql_lora_files):
lora_config = LoRAConfig(max_lora_rank=8, max_cpu_loras=4, max_loras=4) lora_config = LoRAConfig(max_lora_rank=8, max_cpu_loras=4, max_loras=4)
worker_lora_manager = LRUCacheWorkerLoRAManager( worker_adapter_manager = LRUCacheWorkerLoRAManager(
4, 2, llama_2_7b_model_extra_embeddings.unpadded_vocab_size - 4, 2, llama_2_7b_model_extra_embeddings.unpadded_vocab_size -
lora_config.lora_extra_vocab_size, lora_config, torch.device("cuda"), lora_config.lora_extra_vocab_size, lora_config, torch.device("cuda"),
EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES) EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES)
worker_lora_manager.create_lora_manager(llama_2_7b_model_extra_embeddings) worker_adapter_manager.create_lora_manager(
llama_2_7b_model_extra_embeddings)
mapping = LoRAMapping([], []) mapping = LoRAMapping([], [])
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("2", 2, sql_lora_files) LoRARequest("2", 2, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1, 2} assert worker_adapter_manager.list_adapters() == {1, 2}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("3", 3, sql_lora_files), LoRARequest("3", 3, sql_lora_files),
LoRARequest("4", 4, sql_lora_files) LoRARequest("4", 4, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1, 2, 3, 4} assert worker_adapter_manager.list_adapters() == {1, 2, 3, 4}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 3 assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 3
assert worker_lora_manager._lora_manager.lora_index_to_id[3] == 4 assert worker_adapter_manager._adapter_manager.lora_index_to_id[3] == 4
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("2", 2, sql_lora_files), LoRARequest("2", 2, sql_lora_files),
LoRARequest("5", 5, sql_lora_files) LoRARequest("5", 5, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1, 2, 4, 5} assert worker_adapter_manager.list_adapters() == {1, 2, 4, 5}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 5 assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 5
assert worker_lora_manager._lora_manager.lora_index_to_id[3] == 4 assert worker_adapter_manager._adapter_manager.lora_index_to_id[3] == 4
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("1", 1, sql_lora_files) LoRARequest("1", 1, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1, 2, 4, 5} assert worker_adapter_manager.list_adapters() == {1, 2, 4, 5}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 5 assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 5
assert worker_lora_manager._lora_manager.lora_index_to_id[3] == 4 assert worker_adapter_manager._adapter_manager.lora_index_to_id[3] == 4
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("6", 6, sql_lora_files), LoRARequest("6", 6, sql_lora_files),
LoRARequest("7", 7, sql_lora_files), LoRARequest("7", 7, sql_lora_files),
LoRARequest("8", 8, sql_lora_files) LoRARequest("8", 8, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1, 6, 7, 8} assert worker_adapter_manager.list_adapters() == {1, 6, 7, 8}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 7 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 7
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 8 assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 8
assert worker_lora_manager._lora_manager.lora_index_to_id[3] == 6 assert worker_adapter_manager._adapter_manager.lora_index_to_id[3] == 6
# Over capacity # Over capacity
with pytest.raises(RuntimeError): with pytest.raises(RuntimeError):
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("10", 10, sql_lora_files), LoRARequest("10", 10, sql_lora_files),
LoRARequest("11", 11, sql_lora_files), LoRARequest("11", 11, sql_lora_files),
LoRARequest("12", 12, sql_lora_files), LoRARequest("12", 12, sql_lora_files),
...@@ -362,68 +427,69 @@ def test_lru_cache_worker_lora_manager(llama_2_7b_model_extra_embeddings, ...@@ -362,68 +427,69 @@ def test_lru_cache_worker_lora_manager(llama_2_7b_model_extra_embeddings,
], mapping) ], mapping)
def test_worker_lora_manager(llama_2_7b_model_extra_embeddings, def test_worker_adapter_manager(llama_2_7b_model_extra_embeddings,
sql_lora_files): sql_lora_files):
# Should remove every LoRA not specified in the request. # Should remove every LoRA not specified in the request.
lora_config = LoRAConfig(max_lora_rank=8, max_cpu_loras=4, max_loras=4) lora_config = LoRAConfig(max_lora_rank=8, max_cpu_loras=4, max_loras=4)
worker_lora_manager = WorkerLoRAManager( worker_adapter_manager = WorkerLoRAManager(
4, 2, llama_2_7b_model_extra_embeddings.unpadded_vocab_size - 4, 2, llama_2_7b_model_extra_embeddings.unpadded_vocab_size -
lora_config.lora_extra_vocab_size, lora_config, torch.device("cuda"), lora_config.lora_extra_vocab_size, lora_config, torch.device("cuda"),
EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES) EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES)
worker_lora_manager.create_lora_manager(llama_2_7b_model_extra_embeddings) worker_adapter_manager.create_lora_manager(
llama_2_7b_model_extra_embeddings)
mapping = LoRAMapping([], []) mapping = LoRAMapping([], [])
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("2", 2, sql_lora_files) LoRARequest("2", 2, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1, 2} assert worker_adapter_manager.list_adapters() == {1, 2}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("3", 3, sql_lora_files), LoRARequest("3", 3, sql_lora_files),
LoRARequest("4", 4, sql_lora_files) LoRARequest("4", 4, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1, 3, 4} assert worker_adapter_manager.list_adapters() == {1, 3, 4}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 3 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 3
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 4 assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 4
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("2", 2, sql_lora_files), LoRARequest("2", 2, sql_lora_files),
LoRARequest("5", 5, sql_lora_files) LoRARequest("5", 5, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1, 2, 5} assert worker_adapter_manager.list_adapters() == {1, 2, 5}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 5 assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 5
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("1", 1, sql_lora_files), LoRARequest("1", 1, sql_lora_files),
LoRARequest("1", 1, sql_lora_files) LoRARequest("1", 1, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {1} assert worker_adapter_manager.list_adapters() == {1}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
assert worker_lora_manager._lora_manager.lora_index_to_id[1] is None assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] is None
assert worker_lora_manager._lora_manager.lora_index_to_id[2] is None assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] is None
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("6", 6, sql_lora_files), LoRARequest("6", 6, sql_lora_files),
LoRARequest("7", 7, sql_lora_files), LoRARequest("7", 7, sql_lora_files),
LoRARequest("8", 8, sql_lora_files) LoRARequest("8", 8, sql_lora_files)
], mapping) ], mapping)
assert worker_lora_manager.list_loras() == {6, 7, 8} assert worker_adapter_manager.list_adapters() == {6, 7, 8}
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 8 assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 8
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 6 assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 6
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 7 assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 7
# Over capacity # Over capacity
with pytest.raises(RuntimeError): with pytest.raises(RuntimeError):
worker_lora_manager.set_active_loras([ worker_adapter_manager.set_active_adapters([
LoRARequest("10", 10, sql_lora_files), LoRARequest("10", 10, sql_lora_files),
LoRARequest("11", 11, sql_lora_files), LoRARequest("11", 11, sql_lora_files),
LoRARequest("12", 12, sql_lora_files), LoRARequest("12", 12, sql_lora_files),
...@@ -461,8 +527,8 @@ def test_packed_loras(dist_init, dummy_model_gate_up): ...@@ -461,8 +527,8 @@ def test_packed_loras(dist_init, dummy_model_gate_up):
assert isinstance(model.get_submodule("gate_up_proj"), assert isinstance(model.get_submodule("gate_up_proj"),
MergedColumnParallelLinearWithLoRA) MergedColumnParallelLinearWithLoRA)
assert manager.add_lora(model_lora) assert manager.add_adapter(model_lora)
assert manager.add_lora(model_lora1) assert manager.add_adapter(model_lora1)
packed_lora = model_lora.get_lora("gate_up_proj") packed_lora = model_lora.get_lora("gate_up_proj")
assert packed_lora and isinstance(packed_lora, PackedLoRALayerWeights) assert packed_lora and isinstance(packed_lora, PackedLoRALayerWeights)
......
from typing import List
import pytest import pytest
import torch import torch
...@@ -7,7 +9,7 @@ from vllm.lora.request import LoRARequest ...@@ -7,7 +9,7 @@ from vllm.lora.request import LoRARequest
MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1" MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1"
def do_sample(llm, lora_path: str, lora_id: int): def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
prompts = [ prompts = [
"[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]", # noqa: E501 "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]", # noqa: E501
"[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]", # noqa: E501 "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]", # noqa: E501
...@@ -20,7 +22,7 @@ def do_sample(llm, lora_path: str, lora_id: int): ...@@ -20,7 +22,7 @@ def do_sample(llm, lora_path: str, lora_id: int):
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts = [] generated_texts: List[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text.strip() generated_text = output.outputs[0].text.strip()
...@@ -38,14 +40,14 @@ def test_mixtral_lora(mixtral_lora_files, tp_size): ...@@ -38,14 +40,14 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
enable_lora=True, enable_lora=True,
max_num_seqs=16, max_num_seqs=16,
max_loras=4, max_loras=4,
distributed_executor_backend="ray",
tensor_parallel_size=tp_size) tensor_parallel_size=tp_size)
expected_lora_output = [ expected_lora_output = [
"give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])", # noqa: E501 "give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])", # noqa: E501
"give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])", # noqa: E501 "give_opinion(name[SpellForce 3], developer[Grimlore Games], release_year[2017], rating[poor])", # noqa: E501
"inform(name[BioShock], release_year[2007], rating[good], genres[action-adventure, role-playing, shooter], platforms[PlayStation, Xbox, PC], available_on_steam[yes], has_linux_release[no], has_mac_release[yes])", # noqa: E501 "inform(name[BioShock], release_year[2007], rating[good], genres[action-adventure, role-playing, shooter], platforms[PlayStation, Xbox, PC], available_on_steam[yes], has_linux_release[no], has_mac_release[yes])", # noqa: E501
] ]
assert do_sample(llm, mixtral_lora_files, assert do_sample(llm, mixtral_lora_files,
lora_id=1) == expected_lora_output lora_id=1) == expected_lora_output
assert do_sample(llm, mixtral_lora_files, assert do_sample(llm, mixtral_lora_files,
......
from typing import List
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
...@@ -6,7 +8,7 @@ MODEL_PATH = "microsoft/phi-2" ...@@ -6,7 +8,7 @@ MODEL_PATH = "microsoft/phi-2"
PROMPT_TEMPLATE = "### Instruct: {sql_prompt}\n\n### Context: {context}\n\n### Output:" # noqa: E501 PROMPT_TEMPLATE = "### Instruct: {sql_prompt}\n\n### Context: {context}\n\n### Output:" # noqa: E501
def do_sample(llm, lora_path: str, lora_id: int) -> str: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
prompts = [ prompts = [
PROMPT_TEMPLATE.format( PROMPT_TEMPLATE.format(
sql_prompt= sql_prompt=
...@@ -35,7 +37,7 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str: ...@@ -35,7 +37,7 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str:
if lora_id else None, if lora_id else None,
) )
# Print the outputs. # Print the outputs.
generated_texts = [] generated_texts: List[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text.strip() generated_text = output.outputs[0].text.strip()
......
...@@ -49,39 +49,60 @@ H1 = H2 = [ ...@@ -49,39 +49,60 @@ H1 = H2 = [
128, 128,
256, 256,
512, 512,
896,
1024, 1024,
1152, 1152,
1216,
1280, 1280,
1536, 1536,
1664,
2048, 2048,
2240,
2304, 2304,
2368,
2432,
2560, 2560,
2752, 2752,
3072, 3072,
3328, 3328,
3456, 3456,
3584, 3584,
3712,
4096, 4096,
4480,
4608, 4608,
4736,
4864,
5120, 5120,
5504, 5504,
5632, 5632,
5888,
6144, 6144,
6400, 6400,
6848, 6848,
6912, 6912,
7168, 7168,
7424,
8192, 8192,
8960,
9216, 9216,
9472,
10240, 10240,
11008, 11008,
11264,
13824, 13824,
14336, 14336,
14784,
14848,
15360, 15360,
18944,
22016, 22016,
22528,
24576, 24576,
27392, 27392,
27648, 27648,
29568,
29696,
32000, 32000,
32256, 32256,
32512, 32512,
...@@ -90,6 +111,9 @@ H1 = H2 = [ ...@@ -90,6 +111,9 @@ H1 = H2 = [
36864, 36864,
43264, 43264,
49152, 49152,
49408,
60544,
60672,
64000, 64000,
64256, 64256,
102400, 102400,
......
...@@ -25,7 +25,10 @@ MODELS: List[ModelWithQuantization] = [ ...@@ -25,7 +25,10 @@ MODELS: List[ModelWithQuantization] = [
] ]
def do_sample(llm, lora_path: str, lora_id: int, max_tokens=256): def do_sample(llm: vllm.LLM,
lora_path: str,
lora_id: int,
max_tokens: int = 256) -> List[str]:
raw_prompts = [ raw_prompts = [
"Give me an orange-ish brown color", "Give me an orange-ish brown color",
"Give me a neon pink color", "Give me a neon pink color",
...@@ -45,7 +48,7 @@ def do_sample(llm, lora_path: str, lora_id: int, max_tokens=256): ...@@ -45,7 +48,7 @@ def do_sample(llm, lora_path: str, lora_id: int, max_tokens=256):
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts = [] generated_texts: List[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
......
from typing import List, Optional from typing import Dict, List, Optional
import torch import torch
...@@ -9,13 +9,13 @@ class DummyLoRAManager: ...@@ -9,13 +9,13 @@ class DummyLoRAManager:
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self._loras = {} self._loras: Dict[str, LoRALayerWeights] = {}
def set_module_lora(self, module_name: str, lora: LoRALayerWeights): def set_module_lora(self, module_name: str, lora: LoRALayerWeights):
self._loras[module_name] = lora self._loras[module_name] = lora
def get_module_lora(self, module_name: str) -> Optional[LoRALayerWeights]: def get_module_lora(self, module_name: str) -> LoRALayerWeights:
return self._loras.get(module_name, None) return self._loras[module_name]
def init_random_lora(self, def init_random_lora(self,
module_name: str, module_name: str,
...@@ -68,11 +68,11 @@ class DummyLoRAManager: ...@@ -68,11 +68,11 @@ class DummyLoRAManager:
module_name: str, module_name: str,
input_dim: int, input_dim: int,
output_dims: List[int], output_dims: List[int],
noop_lora_index: List[int] = None, noop_lora_index: Optional[List[int]] = None,
rank=8, rank: int = 8,
): ):
base_loras = [] base_loras: List[LoRALayerWeights] = []
noop_lora_index = set(noop_lora_index or []) noop_lora_index_set = set(noop_lora_index or [])
for i, out_dim in enumerate(output_dims): for i, out_dim in enumerate(output_dims):
base_lora = self.init_lora( base_lora = self.init_lora(
...@@ -80,7 +80,7 @@ class DummyLoRAManager: ...@@ -80,7 +80,7 @@ class DummyLoRAManager:
input_dim, input_dim,
out_dim, out_dim,
rank=rank, rank=rank,
noop=i in noop_lora_index, noop=i in noop_lora_index_set,
) )
base_loras.append(base_lora) base_loras.append(base_lora)
packed_lora = PackedLoRALayerWeights.pack(base_loras) packed_lora = PackedLoRALayerWeights.pack(base_loras)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment