Unverified Commit f6af3a65 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Cleanup readme, llava examples, usage examples and nccl init (#1194)

parent c9064e6f
......@@ -59,7 +59,7 @@ class TestEmbeddingModels(unittest.TestCase):
tolerance = 1e-2
assert torch.all(
abs(similarities - 1) < tolerance
), f"embeddings not all close"
), "embeddings are not all close"
def test_prefill_logits(self):
for model, tp_size in MODELS:
......
......@@ -59,7 +59,7 @@ class TestGenerationModels(unittest.TestCase):
tolerance = 3e-2
assert torch.all(
abs(hf_logprobs - srt_logprobs) < tolerance
), f"prefill logprobs not all close"
), "prefill logprobs are not all close"
print(hf_outputs.output_strs)
print(srt_outputs.output_strs)
......
......@@ -14,7 +14,7 @@ suites = {
"test_torch_compile.py",
"test_triton_attn_backend.py",
"test_vision_openai_server.py",
"test_large_max_new_tokens.py",
"test_update_weights.py",
"models/test_generation_models.py",
"models/test_embedding_models.py",
"sampling/penaltylib",
......
......@@ -2,8 +2,6 @@ import base64
import io
import json
import os
import sys
import time
import unittest
import numpy as np
......@@ -12,12 +10,10 @@ import requests
from decord import VideoReader, cpu
from PIL import Image
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server
# python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --tokenizer-path lmms-lab/llavanext-qwen-siglip-tokenizer --port=30000 --host=127.0.0.1 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384
class TestOpenAIVisionServer(unittest.TestCase):
@classmethod
def setUpClass(cls):
......@@ -32,11 +28,9 @@ class TestOpenAIVisionServer(unittest.TestCase):
other_args=[
"--chat-template",
"chatml-llava",
"--tokenizer-path",
"lmms-lab/llavanext-qwen-siglip-tokenizer",
"--chunked-prefill-size",
"16384",
"--log-requests",
# "--log-requests",
],
)
cls.base_url += "/v1"
......@@ -132,7 +126,6 @@ class TestOpenAIVisionServer(unittest.TestCase):
messages = self.prepare_video_messages(file_path)
start_time = time.time()
video_request = client.chat.completions.create(
model="default",
messages=messages,
......@@ -140,15 +133,14 @@ class TestOpenAIVisionServer(unittest.TestCase):
max_tokens=1024,
stream=True,
)
print("-" * 30)
video_response = ""
for chunk in video_request:
if chunk.choices[0].delta.content is not None:
content = chunk.choices[0].delta.content
video_response += content
sys.stdout.write(content)
sys.stdout.flush()
print(content, end="", flush=True)
print("-" * 30)
# Add assertions to validate the video response
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment