"tests/git@developer.sourcefind.cn:OpenDAS/fairscale.git" did not exist on "bc1e60e09c2437c284284e09c11ec9e6767a998b"
Unverified Commit 5a650669 authored by drbh's avatar drbh Committed by GitHub
Browse files

feat: simple mistral lora integration tests (#2180)

* feat: simple mistral lora integration tests

* fix: include args in docker launcher

* fix: disable cuda graphs with lora and warn

* fix: adjust docs and precommit issues

* fix: re update docs
parent dbb23fbf
...@@ -333,6 +333,8 @@ def launcher(event_loop): ...@@ -333,6 +333,8 @@ def launcher(event_loop):
max_input_length: Optional[int] = None, max_input_length: Optional[int] = None,
max_batch_prefill_tokens: Optional[int] = None, max_batch_prefill_tokens: Optional[int] = None,
max_total_tokens: Optional[int] = None, max_total_tokens: Optional[int] = None,
lora_adapters: Optional[List[str]] = None,
cuda_graphs: Optional[List[int]] = None,
): ):
port = random.randint(8000, 10_000) port = random.randint(8000, 10_000)
master_port = random.randint(10_000, 20_000) master_port = random.randint(10_000, 20_000)
...@@ -379,6 +381,14 @@ def launcher(event_loop): ...@@ -379,6 +381,14 @@ def launcher(event_loop):
if max_total_tokens: if max_total_tokens:
args.append("--max-total-tokens") args.append("--max-total-tokens")
args.append(str(max_total_tokens)) args.append(str(max_total_tokens))
if lora_adapters:
args.append("--lora-adapters")
args.append(",".join(lora_adapters))
if cuda_graphs:
args.append("--cuda-graphs")
args.append(",".join(map(str, cuda_graphs)))
print(" ".join(args), file=sys.stderr)
env["LOG_LEVEL"] = "info,text_generation_router=debug" env["LOG_LEVEL"] = "info,text_generation_router=debug"
...@@ -418,6 +428,8 @@ def launcher(event_loop): ...@@ -418,6 +428,8 @@ def launcher(event_loop):
max_input_length: Optional[int] = None, max_input_length: Optional[int] = None,
max_batch_prefill_tokens: Optional[int] = None, max_batch_prefill_tokens: Optional[int] = None,
max_total_tokens: Optional[int] = None, max_total_tokens: Optional[int] = None,
lora_adapters: Optional[List[str]] = None,
cuda_graphs: Optional[List[int]] = None,
): ):
port = random.randint(8000, 10_000) port = random.randint(8000, 10_000)
...@@ -447,6 +459,12 @@ def launcher(event_loop): ...@@ -447,6 +459,12 @@ def launcher(event_loop):
if max_total_tokens: if max_total_tokens:
args.append("--max-total-tokens") args.append("--max-total-tokens")
args.append(str(max_total_tokens)) args.append(str(max_total_tokens))
if lora_adapters:
args.append("--lora-adapters")
args.append(",".join(lora_adapters))
if cuda_graphs:
args.append("--cuda-graphs")
args.append(",".join(map(str, cuda_graphs)))
client = docker.from_env() client = docker.from_env()
......
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.27416992,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.17016602,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -2.7109375,
"special": false,
"text": "I"
},
{
"id": 28809,
"logprob": -1.5,
"special": false,
"text": "’"
},
{
"id": 28719,
"logprob": -0.34204102,
"special": false,
"text": "m"
},
{
"id": 459,
"logprob": -1.6914062,
"special": false,
"text": " not"
},
{
"id": 1864,
"logprob": -0.69140625,
"special": false,
"text": " sure"
},
{
"id": 513,
"logprob": -1.6171875,
"special": false,
"text": " if"
},
{
"id": 315,
"logprob": -1.3837891,
"special": false,
"text": " I"
},
{
"id": 541,
"logprob": -1.2226562,
"special": false,
"text": " can"
},
{
"id": 1567,
"logprob": -1.8652344,
"special": false,
"text": " come"
},
{
"id": 582,
"logprob": -0.0070228577,
"special": false,
"text": " up"
},
{
"id": 395,
"logprob": -0.0054092407,
"special": false,
"text": " with"
},
{
"id": 28705,
"logprob": -0.62597656,
"special": false,
"text": " "
},
{
"id": 28770,
"logprob": -0.0035572052,
"special": false,
"text": "3"
},
{
"id": 4842,
"logprob": -0.93603516,
"special": false,
"text": " unique"
},
{
"id": 3085,
"logprob": -0.028411865,
"special": false,
"text": " words"
},
{
"id": 369,
"logprob": -1.0400391,
"special": false,
"text": " that"
},
{
"id": 6685,
"logprob": -0.09710693,
"special": false,
"text": " describe"
},
{
"id": 528,
"logprob": -0.066467285,
"special": false,
"text": " me"
},
{
"id": 28725,
"logprob": -1.0722656,
"special": false,
"text": ","
},
{
"id": 562,
"logprob": -0.33422852,
"special": false,
"text": " but"
},
{
"id": 315,
"logprob": -0.5136719,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -0.8989258,
"special": false,
"text": "’"
},
{
"id": 584,
"logprob": -0.2076416,
"special": false,
"text": "ll"
},
{
"id": 1464,
"logprob": -0.8808594,
"special": false,
"text": " try"
},
{
"id": 28723,
"logprob": -0.88427734,
"special": false,
"text": "."
},
{
"id": 13,
"logprob": -0.91064453,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.08105469,
"special": false,
"text": "\n"
},
{
"id": 28740,
"logprob": -1.8486328,
"special": false,
"text": "1"
},
{
"id": 28723,
"logprob": -0.111572266,
"special": false,
"text": "."
},
{
"id": 23626,
"logprob": -3.15625,
"special": false,
"text": " Creative"
},
{
"id": 13,
"logprob": -0.9194336,
"special": false,
"text": "\n"
},
{
"id": 28750,
"logprob": -0.24841309,
"special": false,
"text": "2"
},
{
"id": 28723,
"logprob": -9.393692e-05,
"special": false,
"text": "."
},
{
"id": 6785,
"logprob": -3.1386719,
"special": false,
"text": " Fun"
},
{
"id": 1780,
"logprob": -0.53564453,
"special": false,
"text": "ny"
},
{
"id": 13,
"logprob": -0.09033203,
"special": false,
"text": "\n"
},
{
"id": 28770,
"logprob": -0.00466156,
"special": false,
"text": "3"
},
{
"id": 28723,
"logprob": -0.00016450882,
"special": false,
"text": "."
}
]
},
"generated_text": "\n\nI’m not sure if I can come up with 3 unique words that describe me, but I’ll try.\n\n1. Creative\n2. Funny\n3."
}
{
"details": {
"finish_reason": "eos_token",
"generated_tokens": 7,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 1,
"logprob": -0.49658203,
"special": true,
"text": "<s>"
},
{
"id": 28705,
"logprob": -0.0016384125,
"special": false,
"text": " "
},
{
"id": 1,
"logprob": -1.4931641,
"special": true,
"text": "<s>"
},
{
"id": 28705,
"logprob": -0.00075769424,
"special": false,
"text": " "
},
{
"id": 28740,
"logprob": -0.25024414,
"special": false,
"text": "1"
},
{
"id": 28740,
"logprob": -0.2631836,
"special": false,
"text": "1"
},
{
"id": 2,
"logprob": -0.0003285408,
"special": true,
"text": "</s>"
}
]
},
"generated_text": " 11"
}
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.0488281,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.0800781,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -2.1152344,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -1.6748047,
"special": false,
"text": " "
},
{
"id": 28740,
"logprob": -0.097229004,
"special": false,
"text": "1"
},
{
"id": 28723,
"logprob": -0.16467285,
"special": false,
"text": "."
},
{
"id": 7615,
"logprob": -2.2246094,
"special": false,
"text": " News"
},
{
"id": 13,
"logprob": -1.0488281,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.69189453,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.013343811,
"special": false,
"text": " "
},
{
"id": 28750,
"logprob": -0.011230469,
"special": false,
"text": "2"
},
{
"id": 28723,
"logprob": -0.00096845627,
"special": false,
"text": "."
},
{
"id": 21095,
"logprob": -2.5605469,
"special": false,
"text": " Blog"
},
{
"id": 13,
"logprob": -0.19458008,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.031280518,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.0030708313,
"special": false,
"text": " "
},
{
"id": 28770,
"logprob": -0.0029277802,
"special": false,
"text": "3"
},
{
"id": 28723,
"logprob": -0.0012350082,
"special": false,
"text": "."
},
{
"id": 20108,
"logprob": -2.1582031,
"special": false,
"text": " Article"
},
{
"id": 13,
"logprob": -0.05810547,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.35083008,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.034332275,
"special": false,
"text": " "
},
{
"id": 28781,
"logprob": -0.009666443,
"special": false,
"text": "4"
},
{
"id": 28723,
"logprob": -0.0013113022,
"special": false,
"text": "."
},
{
"id": 8349,
"logprob": -2.6191406,
"special": false,
"text": " Review"
},
{
"id": 13,
"logprob": -0.04031372,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.45239258,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.045410156,
"special": false,
"text": " "
},
{
"id": 28782,
"logprob": -0.0041236877,
"special": false,
"text": "5"
},
{
"id": 28723,
"logprob": -0.0010223389,
"special": false,
"text": "."
},
{
"id": 5299,
"logprob": -2.8066406,
"special": false,
"text": " Other"
},
{
"id": 13,
"logprob": -0.12054443,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.44580078,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.4921875,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.3574219,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.0039062,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.5859375,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.43481445,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.2783203,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.20410156,
"special": false,
"text": "\n"
}
]
},
"generated_text": "\n\n### 1. News\n### 2. Blog\n### 3. Article\n### 4. Review\n### 5. Other\n\n\n\n\n\n\n\n\n"
}
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.31347656,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.27441406,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -2.2285156,
"special": false,
"text": "I"
},
{
"id": 28809,
"logprob": -1.4677734,
"special": false,
"text": "’"
},
{
"id": 28719,
"logprob": -0.31762695,
"special": false,
"text": "m"
},
{
"id": 264,
"logprob": -1.6865234,
"special": false,
"text": " a"
},
{
"id": 1215,
"logprob": -3.2695312,
"special": false,
"text": " very"
},
{
"id": 20640,
"logprob": -3.1230469,
"special": false,
"text": " passionate"
},
{
"id": 1338,
"logprob": -0.48339844,
"special": false,
"text": " person"
},
{
"id": 28723,
"logprob": -0.9970703,
"special": false,
"text": "."
},
{
"id": 315,
"logprob": -0.5498047,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -1.1923828,
"special": false,
"text": "’"
},
{
"id": 28719,
"logprob": -0.080444336,
"special": false,
"text": "m"
},
{
"id": 1215,
"logprob": -1.8271484,
"special": false,
"text": " very"
},
{
"id": 12215,
"logprob": -2.8847656,
"special": false,
"text": " driven"
},
{
"id": 28723,
"logprob": -1.0927734,
"special": false,
"text": "."
},
{
"id": 315,
"logprob": -0.4584961,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -0.5019531,
"special": false,
"text": "’"
},
{
"id": 28719,
"logprob": -0.030715942,
"special": false,
"text": "m"
},
{
"id": 1215,
"logprob": -0.96972656,
"special": false,
"text": " very"
},
{
"id": 7798,
"logprob": -2.8847656,
"special": false,
"text": " determined"
},
{
"id": 28723,
"logprob": -0.27319336,
"special": false,
"text": "."
},
{
"id": 13,
"logprob": -0.56396484,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.011016846,
"special": false,
"text": "\n"
},
{
"id": 3195,
"logprob": -0.7163086,
"special": false,
"text": "What"
},
{
"id": 349,
"logprob": -1.1611328,
"special": false,
"text": " is"
},
{
"id": 574,
"logprob": -0.515625,
"special": false,
"text": " your"
},
{
"id": 6656,
"logprob": -1.0253906,
"special": false,
"text": " favorite"
},
{
"id": 1970,
"logprob": -2.1738281,
"special": false,
"text": " thing"
},
{
"id": 684,
"logprob": -0.48364258,
"special": false,
"text": " about"
},
{
"id": 1250,
"logprob": -1.8876953,
"special": false,
"text": " being"
},
{
"id": 264,
"logprob": -0.41967773,
"special": false,
"text": " a"
},
{
"id": 8626,
"logprob": -2.9160156,
"special": false,
"text": " teacher"
},
{
"id": 28804,
"logprob": -0.11920166,
"special": false,
"text": "?"
},
{
"id": 13,
"logprob": -0.023727417,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.010848999,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -1.0566406,
"special": false,
"text": "I"
},
{
"id": 2016,
"logprob": -0.7163086,
"special": false,
"text": " love"
},
{
"id": 272,
"logprob": -1.9169922,
"special": false,
"text": " the"
},
{
"id": 1639,
"logprob": -2.03125,
"special": false,
"text": " fact"
}
]
},
"generated_text": "\n\nI’m a very passionate person. I’m very driven. I’m very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact"
}
import pytest
import requests
@pytest.fixture(scope="module")
def lora_mistral_handle(launcher):
with launcher(
"mistralai/Mistral-7B-v0.1",
lora_adapters=[
"predibase/dbpedia",
"predibase/customer_support",
],
cuda_graphs=[0],
) as handle:
yield handle
@pytest.fixture(scope="module")
async def lora_mistral(lora_mistral_handle):
await lora_mistral_handle.health(300)
return lora_mistral_handle.client
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral(lora_mistral, response_snapshot):
response = await lora_mistral.generate(
"Test request", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
classification_prompt = """You are given the title and the body of an article below. Please determine the type of the article.\n### Title: Great White Whale\n\n### Body: Great White Whale is the debut album by the Canadian rock band Secret and Whisper. The album was in the works for about a year and was released on February 12 2008. A music video was shot in Pittsburgh for the album's first single XOXOXO. The album reached number 17 on iTunes's top 100 albums in its first week on sale.\n\n### Article Type:"""
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral_without_adapter(lora_mistral, response_snapshot):
response = requests.post(
f"{lora_mistral.base_url}/generate",
headers=lora_mistral.headers,
json={
"inputs": classification_prompt,
"parameters": {
"max_new_tokens": 40,
"details": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert (
data["generated_text"]
== "\n\n### 1. News\n### 2. Blog\n### 3. Article\n### 4. Review\n### 5. Other\n\n\n\n\n\n\n\n\n"
)
assert data == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral_with_dbpedia_adapter(lora_mistral, response_snapshot):
response = requests.post(
f"{lora_mistral.base_url}/generate",
headers=lora_mistral.headers,
json={
"inputs": classification_prompt,
"parameters": {
"max_new_tokens": 40,
"adapter_id": "predibase/dbpedia",
"details": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert data["generated_text"] == " 11"
assert data == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral_with_customer_support_adapter(
lora_mistral, response_snapshot
):
print(lora_mistral.base_url)
print(lora_mistral.headers)
response = requests.post(
f"{lora_mistral.base_url}/generate",
headers=lora_mistral.headers,
json={
"inputs": "What are 3 unique words that describe you?",
"parameters": {
"max_new_tokens": 40,
"adapter_id": "predibase/customer_support",
"details": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert (
data["generated_text"]
== "\n\nI’m not sure if I can come up with 3 unique words that describe me, but I’ll try.\n\n1. Creative\n2. Funny\n3."
)
assert data == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral_without_customer_support_adapter(
lora_mistral, response_snapshot
):
response = requests.post(
f"{lora_mistral.base_url}/generate",
headers=lora_mistral.headers,
json={
"inputs": "What are 3 unique words that describe you?",
"parameters": {
"max_new_tokens": 40,
"details": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert (
data["generated_text"]
== "\n\nI’m a very passionate person. I’m very driven. I’m very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact"
)
assert data == response_snapshot
...@@ -91,6 +91,15 @@ def serve( ...@@ -91,6 +91,15 @@ def serve(
f"LoRA adapters are enabled. This is an experimental feature and may not work as expected." f"LoRA adapters are enabled. This is an experimental feature and may not work as expected."
) )
# TODO: enable lora with cuda graphs. for now disable cuda graphs if lora is enabled
# and warn the user
if len(lora_adapter_ids) > 0 and os.getenv("CUDA_GRAPHS", None) is not None:
logger.warning(
f"LoRa adapter are not supported with CUDA Graphs. Disabling CUDA Graphs."
)
global CUDA_GRAPHS
CUDA_GRAPHS = None
# Downgrade enum into str for easier management later on # Downgrade enum into str for easier management later on
quantize = None if quantize is None else quantize.value quantize = None if quantize is None else quantize.value
dtype = None if dtype is None else dtype.value dtype = None if dtype is None else dtype.value
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment