Unverified Commit f6af3a65 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Cleanup readme, llava examples, usage examples and nccl init (#1194)

parent c9064e6f
...@@ -59,7 +59,7 @@ class TestEmbeddingModels(unittest.TestCase): ...@@ -59,7 +59,7 @@ class TestEmbeddingModels(unittest.TestCase):
tolerance = 1e-2 tolerance = 1e-2
assert torch.all( assert torch.all(
abs(similarities - 1) < tolerance abs(similarities - 1) < tolerance
), f"embeddings not all close" ), "embeddings are not all close"
def test_prefill_logits(self): def test_prefill_logits(self):
for model, tp_size in MODELS: for model, tp_size in MODELS:
......
...@@ -59,7 +59,7 @@ class TestGenerationModels(unittest.TestCase): ...@@ -59,7 +59,7 @@ class TestGenerationModels(unittest.TestCase):
tolerance = 3e-2 tolerance = 3e-2
assert torch.all( assert torch.all(
abs(hf_logprobs - srt_logprobs) < tolerance abs(hf_logprobs - srt_logprobs) < tolerance
), f"prefill logprobs not all close" ), "prefill logprobs are not all close"
print(hf_outputs.output_strs) print(hf_outputs.output_strs)
print(srt_outputs.output_strs) print(srt_outputs.output_strs)
......
...@@ -14,7 +14,7 @@ suites = { ...@@ -14,7 +14,7 @@ suites = {
"test_torch_compile.py", "test_torch_compile.py",
"test_triton_attn_backend.py", "test_triton_attn_backend.py",
"test_vision_openai_server.py", "test_vision_openai_server.py",
"test_large_max_new_tokens.py", "test_update_weights.py",
"models/test_generation_models.py", "models/test_generation_models.py",
"models/test_embedding_models.py", "models/test_embedding_models.py",
"sampling/penaltylib", "sampling/penaltylib",
......
...@@ -2,8 +2,6 @@ import base64 ...@@ -2,8 +2,6 @@ import base64
import io import io
import json import json
import os import os
import sys
import time
import unittest import unittest
import numpy as np import numpy as np
...@@ -12,12 +10,10 @@ import requests ...@@ -12,12 +10,10 @@ import requests
from decord import VideoReader, cpu from decord import VideoReader, cpu
from PIL import Image from PIL import Image
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server
# python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --tokenizer-path lmms-lab/llavanext-qwen-siglip-tokenizer --port=30000 --host=127.0.0.1 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384
class TestOpenAIVisionServer(unittest.TestCase): class TestOpenAIVisionServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
...@@ -32,11 +28,9 @@ class TestOpenAIVisionServer(unittest.TestCase): ...@@ -32,11 +28,9 @@ class TestOpenAIVisionServer(unittest.TestCase):
other_args=[ other_args=[
"--chat-template", "--chat-template",
"chatml-llava", "chatml-llava",
"--tokenizer-path",
"lmms-lab/llavanext-qwen-siglip-tokenizer",
"--chunked-prefill-size", "--chunked-prefill-size",
"16384", "16384",
"--log-requests", # "--log-requests",
], ],
) )
cls.base_url += "/v1" cls.base_url += "/v1"
...@@ -132,7 +126,6 @@ class TestOpenAIVisionServer(unittest.TestCase): ...@@ -132,7 +126,6 @@ class TestOpenAIVisionServer(unittest.TestCase):
messages = self.prepare_video_messages(file_path) messages = self.prepare_video_messages(file_path)
start_time = time.time()
video_request = client.chat.completions.create( video_request = client.chat.completions.create(
model="default", model="default",
messages=messages, messages=messages,
...@@ -140,15 +133,14 @@ class TestOpenAIVisionServer(unittest.TestCase): ...@@ -140,15 +133,14 @@ class TestOpenAIVisionServer(unittest.TestCase):
max_tokens=1024, max_tokens=1024,
stream=True, stream=True,
) )
print("-" * 30) print("-" * 30)
video_response = "" video_response = ""
for chunk in video_request: for chunk in video_request:
if chunk.choices[0].delta.content is not None: if chunk.choices[0].delta.content is not None:
content = chunk.choices[0].delta.content content = chunk.choices[0].delta.content
video_response += content video_response += content
sys.stdout.write(content) print(content, end="", flush=True)
sys.stdout.flush()
print("-" * 30) print("-" * 30)
# Add assertions to validate the video response # Add assertions to validate the video response
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment